In [1]:
# Dependencies
import pandas as pd
import requests as req
import os, json, csv, time

In [2]:
# Assign the file to a variable 
file_path = os.path.join("assets", "data", "Canada_transforming.csv")

# Read the file
open_file = pd.read_csv(file_path, encoding="latin1") 

# Assign file to a DataFrame
df = pd.DataFrame(open_file)

# Preview the DataFrame
df.head()

Unnamed: 0,id_area,areaname,id_reg,regname,country,id_dev,devname,1980,1981,1982,...,2012,2013,Unnamed: 41,Unnamed: 42,Unnamed: 43,Unnamed: 44,Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48
0,935.0,Asia,5501.0,Southern Asia,Afghanistan,902.0,Developing regions,16.0,39.0,39.0,...,2635.0,2004.0,,,,,,,,
1,908.0,Europe,925.0,Southern Europe,Albania,901.0,Developed regions,1.0,0.0,0.0,...,620.0,603.0,,,,,,,,
2,903.0,Africa,912.0,Northern Africa,Algeria,902.0,Developing regions,80.0,67.0,71.0,...,3774.0,4331.0,,,,,,,,
3,909.0,Oceania,957.0,Polynesia,American Samoa,902.0,Developing regions,0.0,1.0,0.0,...,0.0,0.0,,,,,,,,
4,908.0,Europe,925.0,Southern Europe,Andorra,901.0,Developed regions,0.0,0.0,0.0,...,1.0,1.0,,,,,,,,


### Data Treatment

In [3]:
# Get just the columns we will use for the project
df = df[['areaname', 'regname', 'country', 'devname', '1980', '1981', '1982', '1983', '1984', '1985', '1986', \
       '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999',\
        '2000', '2001', '2002', '2003', '2004','2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013']]

# Remove blanck rows in the end of the DataFrame
df.dropna(subset=["country"], inplace=True)

df.tail()

Unnamed: 0,areaname,regname,country,devname,1980,1981,1982,1983,1984,1985,...,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
189,Asia,South-Eastern Asia,Viet Nam,Developing regions,1191.0,1829.0,2162.0,3404.0,7583.0,5907.0,...,1816.0,1852.0,3153.0,2574.0,1784.0,2171.0,1942.0,1723.0,1731.0,2112.0
190,Africa,Northern Africa,Western Sahara,Developing regions,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
191,Asia,Western Asia,Yemen,Developing regions,1.0,2.0,1.0,6.0,0.0,18.0,...,124.0,161.0,140.0,122.0,133.0,128.0,211.0,160.0,174.0,217.0
192,Africa,Eastern Africa,Zambia,Developing regions,11.0,17.0,11.0,7.0,16.0,9.0,...,56.0,91.0,77.0,71.0,64.0,60.0,102.0,69.0,46.0,59.0
193,Africa,Eastern Africa,Zimbabwe,Developing regions,72.0,114.0,102.0,44.0,32.0,29.0,...,1450.0,615.0,454.0,663.0,611.0,508.0,494.0,434.0,437.0,407.0


### Create new fields and Request the API to fill them

In [4]:
# Create the new columns
df["lat"] = 0.0
df["lon"] = 0.0
df["flag"] = ""
df["capital"] = ""
df["population"] = ""
df["countrycode"] = ""


In [5]:
# Set the API URL to get Latitude, Longitude and the SVG country map
url = f"https://restcountries.eu/rest/v2/name/"

# Fill new fields. 
print("-----------------------------")
print("Beginning Data Retrieval")
print("-----------------------------")       

# Loop to execute requests for each country. 
count = 0 
for index, row in df.iterrows(): 
    # Uncomment the two lines below to process just 20 cities. It will take more than 10 minutes to process all cities. 
#     if count == 5:
#         break
#     count += 1
        
    # Add one second interval between requests to avoid overload the API 
    time.sleep(1) 

    
    # Mount the query URL for current country
    query_url = f"{url}{row['country']}?fullText=true"
    
    # print the counter and current country. 
    country = f"{row['country']}" 
    print(f"Processing Record {count} - ({row['country']})")
    # print the URL
    print(query_url)

    try:
        # Run the request
        result = req.get(query_url).json()
        
        # Print a column to check the result
        print(result[0]['flag'])
        
        # Fill fields
        df.loc[index, 'lat']  = result[0]['latlng'][0]
        df.loc[index, 'lon']  = result[0]['latlng'][1]
        df.loc[index, 'flag'] = result[0]['flag']
        df.loc[index, 'capital'] = result[0]['capital']
        df.loc[index, 'population'] = result[0]['population']
        df.loc[index, 'countrycode'] = result[0]['alpha2Code']
        
    except:    
        print(f"We've got an error when processing country={country} - {result}.")
        # Remove the country with error from DF.
        df.drop([index],inplace=True)


-----------------------------
Beginning Data Retrieval
-----------------------------
Processing Record 0 - (Afghanistan)
https://restcountries.eu/rest/v2/name/Afghanistan?fullText=true
https://restcountries.eu/data/afg.svg
Processing Record 0 - (Albania)
https://restcountries.eu/rest/v2/name/Albania?fullText=true
https://restcountries.eu/data/alb.svg
Processing Record 0 - (Algeria)
https://restcountries.eu/rest/v2/name/Algeria?fullText=true
https://restcountries.eu/data/dza.svg
Processing Record 0 - (American Samoa)
https://restcountries.eu/rest/v2/name/American Samoa?fullText=true
https://restcountries.eu/data/asm.svg
Processing Record 0 - (Andorra)
https://restcountries.eu/rest/v2/name/Andorra?fullText=true
https://restcountries.eu/data/and.svg
Processing Record 0 - (Angola)
https://restcountries.eu/rest/v2/name/Angola?fullText=true
https://restcountries.eu/data/ago.svg
Processing Record 0 - (Antigua and Barbuda)
https://restcountries.eu/rest/v2/name/Antigua and Barbuda?fullText=true

https://restcountries.eu/data/gnq.svg
Processing Record 0 - (Eritrea)
https://restcountries.eu/rest/v2/name/Eritrea?fullText=true
https://restcountries.eu/data/eri.svg
Processing Record 0 - (Estonia)
https://restcountries.eu/rest/v2/name/Estonia?fullText=true
https://restcountries.eu/data/est.svg
Processing Record 0 - (Ethiopia)
https://restcountries.eu/rest/v2/name/Ethiopia?fullText=true
https://restcountries.eu/data/eth.svg
Processing Record 0 - (Fiji)
https://restcountries.eu/rest/v2/name/Fiji?fullText=true
https://restcountries.eu/data/fji.svg
Processing Record 0 - (Finland)
https://restcountries.eu/rest/v2/name/Finland?fullText=true
https://restcountries.eu/data/fin.svg
Processing Record 0 - (France)
https://restcountries.eu/rest/v2/name/France?fullText=true
https://restcountries.eu/data/fra.svg
Processing Record 0 - (Gabon)
https://restcountries.eu/rest/v2/name/Gabon?fullText=true
https://restcountries.eu/data/gab.svg
Processing Record 0 - (Gambia)
https://restcountries.eu/rest/v

Processing Record 0 - (Nauru)
https://restcountries.eu/rest/v2/name/Nauru?fullText=true
https://restcountries.eu/data/nru.svg
Processing Record 0 - (Nepal)
https://restcountries.eu/rest/v2/name/Nepal?fullText=true
https://restcountries.eu/data/npl.svg
Processing Record 0 - (Netherlands)
https://restcountries.eu/rest/v2/name/Netherlands?fullText=true
https://restcountries.eu/data/nld.svg
Processing Record 0 - (New Caledonia)
https://restcountries.eu/rest/v2/name/New Caledonia?fullText=true
https://restcountries.eu/data/ncl.svg
Processing Record 0 - (New Zealand)
https://restcountries.eu/rest/v2/name/New Zealand?fullText=true
https://restcountries.eu/data/nzl.svg
Processing Record 0 - (Nicaragua)
https://restcountries.eu/rest/v2/name/Nicaragua?fullText=true
https://restcountries.eu/data/nic.svg
Processing Record 0 - (Niger)
https://restcountries.eu/rest/v2/name/Niger?fullText=true
https://restcountries.eu/data/ner.svg
Processing Record 0 - (Nigeria)
https://restcountries.eu/rest/v2/name/

https://restcountries.eu/data/tuv.svg
Processing Record 0 - (Uganda)
https://restcountries.eu/rest/v2/name/Uganda?fullText=true
https://restcountries.eu/data/uga.svg
Processing Record 0 - (Ukraine)
https://restcountries.eu/rest/v2/name/Ukraine?fullText=true
https://restcountries.eu/data/ukr.svg
Processing Record 0 - (United Arab Emirates)
https://restcountries.eu/rest/v2/name/United Arab Emirates?fullText=true
https://restcountries.eu/data/are.svg
Processing Record 0 - (United Kingdom of Great Britain and Northern Ireland)
https://restcountries.eu/rest/v2/name/United Kingdom of Great Britain and Northern Ireland?fullText=true
https://restcountries.eu/data/gbr.svg
Processing Record 0 - (United Republic of Tanzania)
https://restcountries.eu/rest/v2/name/United Republic of Tanzania?fullText=true
https://restcountries.eu/data/tza.svg
Processing Record 0 - (United States of America)
https://restcountries.eu/rest/v2/name/United States of America?fullText=true
https://restcountries.eu/data/us

In [6]:
# Check the result
df.head()

Unnamed: 0,areaname,regname,country,devname,1980,1981,1982,1983,1984,1985,...,2010,2011,2012,2013,lat,lon,flag,capital,population,countrycode
0,Asia,Southern Asia,Afghanistan,Developing regions,16.0,39.0,39.0,47.0,71.0,340.0,...,1758.0,2203.0,2635.0,2004.0,33.0,65.0,https://restcountries.eu/data/afg.svg,Kabul,27657145,AF
1,Europe,Southern Europe,Albania,Developed regions,1.0,0.0,0.0,0.0,0.0,0.0,...,561.0,539.0,620.0,603.0,41.0,20.0,https://restcountries.eu/data/alb.svg,Tirana,2886026,AL
2,Africa,Northern Africa,Algeria,Developing regions,80.0,67.0,71.0,69.0,63.0,44.0,...,4752.0,4325.0,3774.0,4331.0,28.0,3.0,https://restcountries.eu/data/dza.svg,Algiers,40400000,DZ
3,Oceania,Polynesia,American Samoa,Developing regions,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,-14.333333,-170.0,https://restcountries.eu/data/asm.svg,Pago Pago,57100,AS
4,Europe,Southern Europe,Andorra,Developed regions,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,42.5,1.5,https://restcountries.eu/data/and.svg,Andorra la Vella,78014,AD


In [7]:
# Save the dataset as JSON
df.to_json("./assets/data/dataset.json")