In [1]:
import geopandas as gpd
from shapely.ops import unary_union
from dataset_functions import *
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
df_location = pd.read_stata('./classifications_data/location.dta')
df_location.head()

Unnamed: 0,location_id,location_code,location_name_short_en,level,parent_id
0,0,ABW,Aruba,country,356.0
1,1,AFG,Afghanistan,country,353.0
2,2,AGO,Angola,country,352.0
3,3,AIA,Anguilla,country,356.0
4,4,ALB,Albania,country,355.0


In [3]:
countries_path = './classifications_data/ne_110m_admin_0_countries.geojson'
gdf = gpd.read_file(countries_path)
print(gdf.head())
print(gdf.columns)

        featurecla  scalerank  LABELRANK                   SOVEREIGNT SOV_A3  \
0  Admin-0 country          1          6                         Fiji    FJI   
1  Admin-0 country          1          3  United Republic of Tanzania    TZA   
2  Admin-0 country          1          7               Western Sahara    SAH   
3  Admin-0 country          1          2                       Canada    CAN   
4  Admin-0 country          1          2     United States of America    US1   

   ADM0_DIF  LEVEL               TYPE                        ADMIN ADM0_A3  \
0         0      2  Sovereign country                         Fiji     FJI   
1         0      2  Sovereign country  United Republic of Tanzania     TZA   
2         0      2      Indeterminate               Western Sahara     SAH   
3         0      2  Sovereign country                       Canada     CAN   
4         1      2            Country     United States of America     USA   

   ...     FCLASS_TR     FCLASS_ID     FCLASS_PL  

  aout[:] = out


Only take the colomns that are needed:
- ISO_A2
- ISO_A3
- ADMIN
- POP_EST
- GDP_MD


In [4]:
gdf_resized = gdf[['ISO_A2', 'ISO_A3', 'ADMIN', 'POP_EST', 'GDP_MD', 'geometry']]
print(gdf_resized.shape)
gdf_resized.head()

(177, 6)


Unnamed: 0,ISO_A2,ISO_A3,ADMIN,POP_EST,GDP_MD,geometry
0,FJ,FJI,Fiji,889953.0,5496,"MULTIPOLYGON (((180.00000 -16.06713, 180.00000..."
1,TZ,TZA,United Republic of Tanzania,58005463.0,63177,"POLYGON ((33.90371 -0.95000, 34.07262 -1.05982..."
2,EH,ESH,Western Sahara,603253.0,907,"POLYGON ((-8.66559 27.65643, -8.66512 27.58948..."
3,CA,CAN,Canada,37589262.0,1736425,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742..."
4,US,USA,United States of America,328239523.0,21433226,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000..."


In [5]:
gdf_resized.loc[gdf_resized['ISO_A3'] == '-99']

Unnamed: 0,ISO_A2,ISO_A3,ADMIN,POP_EST,GDP_MD,geometry
160,-99,-99,Northern Cyprus,326000.0,3600,"POLYGON ((32.73178 35.14003, 32.80247 35.14550..."
167,-99,-99,Somaliland,5096159.0,17836,"POLYGON ((48.94821 11.41062, 48.94821 11.41062..."
174,XK,-99,Kosovo,1794248.0,7926,"POLYGON ((20.59025 41.85541, 20.52295 42.21787..."


In [6]:
gdf_resized.loc[gdf_resized['ADMIN'].str.contains('Kosovo'), 'ISO_A3'] = 'XKK'

In [7]:
def merge_rows_geojson(gdf, countries_to_merge):
    """
    Merge rows of geojson combining their geometry and adding their population and GDP. 
    Merges the countries into the entry that has a valid ```ISO_A2``` and valid ```ISO_A3```.
    If there are multiple entries, then the countries are merged into ```countries_to_merge[0]```.

    Args:
        gdf: geopandas DataFrame
        countries_to_merge: List containing the countries to merge.
    """
    df_countries = gdf.loc[gdf['ADMIN'].isin(countries_to_merge)]
    df_valid_countries = df_countries[(df_countries['ISO_A2'] != '-99') & (df_countries['ISO_A3'] != '-99')]
    valid_country = df_valid_countries.iloc[0]['ADMIN'] if len(df_valid_countries.index) == 1 else countries_to_merge[0]

    polygons = [polygon for polygon in gdf_resized.loc[gdf_resized['ADMIN'].isin(countries_to_merge), 'geometry']]
    merged_polygon = gpd.GeoSeries(unary_union(polygons))
    gdf.loc[gdf_resized['ADMIN'] == valid_country, 'geometry'] = merged_polygon[0]

    for idx, country in enumerate(countries_to_merge):
        if country != valid_country:
            gdf.loc[gdf['ADMIN'] == valid_country, 'POP_EST'] += df_countries.iloc[idx]['POP_EST']
            gdf.loc[gdf['ADMIN'] == valid_country, 'GDP_MD'] += df_countries.iloc[idx]['GDP_MD']
           
    df_invalid = df_countries[(df_countries['ISO_A2'] == '-99') | (df_countries['ISO_A3'] == '-99')]

    gdf.drop(df_invalid.index, inplace=True)

In [8]:
gdf_resized.loc[gdf_resized['ADMIN'].str.contains('Somal')]

Unnamed: 0,ISO_A2,ISO_A3,ADMIN,POP_EST,GDP_MD,geometry
12,SO,SOM,Somalia,10192317.3,4719,"POLYGON ((41.58513 -1.68325, 40.99300 -0.85829..."
167,-99,-99,Somaliland,5096159.0,17836,"POLYGON ((48.94821 11.41062, 48.94821 11.41062..."


In [9]:

merge_rows_geojson(gdf_resized, ['Somalia', 'Somaliland'])

In [10]:
gdf_resized.loc[gdf_resized['ADMIN'].str.contains('Somal')]

Unnamed: 0,ISO_A2,ISO_A3,ADMIN,POP_EST,GDP_MD,geometry
12,SO,SOM,Somalia,15288476.3,22555,"POLYGON ((40.99300 -0.85829, 40.98105 2.78452,..."


In [11]:
gdf_resized.loc[gdf_resized['ADMIN'].str.contains('Cyprus')]

Unnamed: 0,ISO_A2,ISO_A3,ADMIN,POP_EST,GDP_MD,geometry
160,-99,-99,Northern Cyprus,326000.0,3600,"POLYGON ((32.73178 35.14003, 32.80247 35.14550..."
161,CY,CYP,Cyprus,1198575.0,24948,"POLYGON ((32.73178 35.14003, 32.91957 35.08783..."


In [12]:
merge_rows_geojson(gdf_resized, ['Northern Cyprus', 'Cyprus'])

In [13]:
gdf_resized.loc[gdf_resized['ADMIN'].str.contains('Cyprus')]

Unnamed: 0,ISO_A2,ISO_A3,ADMIN,POP_EST,GDP_MD,geometry
161,CY,CYP,Cyprus,1524575.0,28548,"POLYGON ((34.00488 34.97810, 32.97983 34.57187..."


### Join trade data with geojson

In [14]:
df_years = contries_table_1962_2019()

  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
  return pd.read_csv(path)
 

In [15]:
print(df_years.shape)
df_years.head()

(258, 813)


Unnamed: 0,location_code,total_trades_1962,export_value_1962,import_value_1962,percentage_exports_1962,highest_export_to_1962,percentage_imports_1962,percentage_total_1962,percentage_trade_to_exp_1962,highest_import_from_1962,...,highest_export_to_2019,percentage_imports_2019,percentage_total_2019,percentage_trade_to_exp_2019,highest_import_from_2019,trade_value_exported_2019,percentage_exp_all_trades_2019,trade_value_imported_2019,percentage_imp_all_trades_2019,percentage_trade_to_imp_2019
0,ABW,-1.0,-1.0,-1.0,-1.0,-1,-1.0,-1.0,-1.0,-1,...,ANS,0.008876,0.004438,46.897879,ANS,71041660.0,0.00039,528139600.0,0.002896,32.630982
1,AFG,200965233.0,81991329.0,118973904.0,0.064978,RUS,0.094286,0.047143,31.443399,RUS,...,PAK,0.049454,0.024727,40.377646,ARE,560615700.0,0.003074,1274326000.0,0.006988,14.131309
2,AGO,249036901.0,131266146.0,117770755.0,0.104028,USA,0.093333,0.046666,29.491607,PRT,...,CHN,0.051003,0.025502,62.742091,CHN,18646640000.0,0.102259,2057495000.0,0.011283,22.122846
3,AIA,-1.0,-1.0,-1.0,-1.0,-1,-1.0,-1.0,-1.0,-1,...,CHL,0.000971,0.000485,54.942945,USA,20585630.0,0.000113,88572810.0,0.000486,50.047787
4,ALB,11221374.0,2301674.0,8919700.0,0.001824,ITA,0.007069,0.003534,54.749978,FRA,...,ITA,0.031245,0.015623,43.613567,ITA,1242825000.0,0.006816,1517889000.0,0.008324,26.641139


In [16]:
gdf_countries = pd.concat([gdf_resized.set_index('ISO_A3'), df_years.set_index('location_code')], axis=1).reindex(gdf_resized.set_index('ISO_A3').index).reset_index()
gdf_countries.insert(len(gdf_countries.columns)-1, 'geometry', gdf_countries.pop('geometry'))
gdf_countries.head()

  gdf_countries.insert(len(gdf_countries.columns)-1, 'geometry', gdf_countries.pop('geometry'))


Unnamed: 0,ISO_A3,ISO_A2,ADMIN,POP_EST,GDP_MD,total_trades_1962,export_value_1962,import_value_1962,percentage_exports_1962,highest_export_to_1962,...,percentage_imports_2019,percentage_total_2019,percentage_trade_to_exp_2019,highest_import_from_2019,trade_value_exported_2019,percentage_exp_all_trades_2019,trade_value_imported_2019,percentage_imp_all_trades_2019,percentage_trade_to_imp_2019,geometry
0,FJI,FJ,Fiji,889953.0,5496.0,32168180.0,13169300.0,18998880.0,0.010437,GBR,...,0.013959,0.00698,24.750338,SGP,238652700.0,0.001309,397135400.0,0.002178,15.60206,"MULTIPOLYGON (((180.00000 -16.06713, 180.00000..."
1,TZA,TZ,United Republic of Tanzania,58005463.0,63177.0,-1.0,-1.0,-1.0,-1.0,-1,...,0.072466,0.036233,18.437558,CHN,700622000.0,0.003842,3814731000.0,0.02092,28.868805,"POLYGON ((33.90371 -0.95000, 34.07262 -1.05982..."
2,ESH,EH,Western Sahara,603253.0,907.0,-1.0,-1.0,-1.0,-1.0,-1,...,0.000116,5.8e-05,71.323643,USA,1690018.0,9e-06,21197770.0,0.000116,100.0,"POLYGON ((-8.66559 27.65643, -8.66512 27.58948..."
3,CAN,CA,Canada,37589262.0,1736425.0,11719000000.0,6126400000.0,5592603000.0,4.85514,USA,...,2.526711,1.263356,75.160112,USA,330853800000.0,1.814409,288655200000.0,1.582991,62.650255,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742..."
4,USA,US,United States of America,328239523.0,21433226.0,37608360000.0,21836180000.0,15772180000.0,17.305058,CAN,...,12.794311,6.397155,17.684965,CHN,288655200000.0,1.582991,387348500000.0,2.124227,16.602903,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000..."


In [17]:
gdf_countries.to_file('countries.geojson', driver="GeoJSON")

In [18]:
countries_path = './countries.geojson'
gdf_countries = gpd.read_file(countries_path)
print(gdf_countries.head())
print(gdf_countries.columns)

  ISO_A3 ISO_A2                        ADMIN      POP_EST      GDP_MD  \
0    FJI     FJ                         Fiji     889953.0      5496.0   
1    TZA     TZ  United Republic of Tanzania   58005463.0     63177.0   
2    ESH     EH               Western Sahara     603253.0       907.0   
3    CAN     CA                       Canada   37589262.0   1736425.0   
4    USA     US     United States of America  328239523.0  21433226.0   

   total_trades_1962  export_value_1962  import_value_1962  \
0       3.216818e+07       1.316930e+07       1.899888e+07   
1      -1.000000e+00      -1.000000e+00      -1.000000e+00   
2      -1.000000e+00      -1.000000e+00      -1.000000e+00   
3       1.171900e+10       6.126400e+09       5.592603e+09   
4       3.760836e+10       2.183618e+10       1.577218e+10   

   percentage_exports_1962 highest_export_to_1962  ...  \
0                 0.010437                    GBR  ...   
1                -1.000000                     -1  ...   
2             

  aout[:] = out


In [19]:
print(gdf_countries[gdf_countries.ISO_A2=='SS'])

    ISO_A3 ISO_A2        ADMIN     POP_EST   GDP_MD  total_trades_1962  \
174    SSD     SS  South Sudan  11062113.0  11998.0               -1.0   

     export_value_1962  import_value_1962  percentage_exports_1962  \
174               -1.0               -1.0                     -1.0   

    highest_export_to_1962  ...  percentage_imports_2019  \
174                     -1  ...                 0.003714   

     percentage_total_2019  percentage_trade_to_exp_2019  \
174               0.001857                     88.362864   

    highest_import_from_2019  trade_value_exported_2019  \
174                      ARE               1.259446e+09   

     percentage_exp_all_trades_2019  trade_value_imported_2019  \
174                        0.006907                248430246.0   

     percentage_imp_all_trades_2019  percentage_trade_to_imp_2019  \
174                        0.001362                     36.679985   

                                              geometry  
174  POLYGON ((30.83

#### Generate options for dropdown menu of style ```<option value="AO">Angola</option>```

In [32]:
df_countries_dropdown = gdf_countries[['ISO_A2', 'ADMIN']]
df_countries_dropdown = df_countries_dropdown.sort_values('ADMIN')

with open('countries_dropdown.txt', 'w') as f:
    for row in df_countries_dropdown.iterrows():
        country_dropdown = '<option value="{}">{}</option>'.format(row[1][0], row[1][1])
        print(country_dropdown)
        f.write(country_dropdown)

<option value="AF">Afghanistan</option>
<option value="AL">Albania</option>
<option value="DZ">Algeria</option>
<option value="AO">Angola</option>
<option value="AQ">Antarctica</option>
<option value="AR">Argentina</option>
<option value="AM">Armenia</option>
<option value="AU">Australia</option>
<option value="AT">Austria</option>
<option value="AZ">Azerbaijan</option>
<option value="BD">Bangladesh</option>
<option value="BY">Belarus</option>
<option value="BE">Belgium</option>
<option value="BZ">Belize</option>
<option value="BJ">Benin</option>
<option value="BT">Bhutan</option>
<option value="BO">Bolivia</option>
<option value="BA">Bosnia and Herzegovina</option>
<option value="BW">Botswana</option>
<option value="BR">Brazil</option>
<option value="BN">Brunei</option>
<option value="BG">Bulgaria</option>
<option value="BF">Burkina Faso</option>
<option value="BI">Burundi</option>
<option value="KH">Cambodia</option>
<option value="CM">Cameroon</option>
<option value="CA">Canada</opt