In [1]:
notebook_id = "121"

<a id="ID_top"></a>
## Country data organisation

This workflow generates (part1), maintains (part2) and exports (part3)

#### Notebook sections:
    
|| [0| Default imports](#ID_top) || [1|Part1 Reference table generation](#ID_part1) || [2|Part2 Table maintanence](#ID_part2) || [3|Part3 Table export](#ID_part3) || 

#### Import all packages that could be required

In [2]:
# %load s_package_import.py
# package library, use to ensure consistency across notebooks, refresh periodically
# general packages
import os # use with os.listdir(_path_)
import requests
import csv
import time
from datetime import datetime
from shutil import copyfile

#temp check
#from shutil import make_archive
import zipfile #notebook

# data analysis packages
import pandas as pd
pd.options.display.max_columns = None # don't truncate columns
#pd.options.display.max_rows = None

import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import descartes
import pycountry
from sklearn import preprocessing

# custom scripts
import s_file_export
import s_filepaths
import s_un_comtrade_extract as s_un
import s_adj_matrix_plot

#=== network analysis
import networkx as nx
#=== gavity modelling
import gme as gme

#=== distance datasets
import wbdata


  import pandas.util.testing as tm


#### Import module and declare path variables
`import s_filepaths.py`

In [3]:
# import ref file
import s_filepaths

# declare local variables to work with
path_raw = s_filepaths.path_raw
path_raw_dl = s_filepaths.path_raw_dl
path_store = s_filepaths.path_store
path_live = s_filepaths.path_live

****

<a id="ID_part1"></a>
### Part 1 | Import all countries and set scope
|| [0| Default imports](#ID_top) || [1|Part1](#ID_part1) || [2|Part2](#ID_part2) || [3|Part3](#ID_part3) || [4|Part4](#ID_part4) || [5|Part5](#ID_part5) ||

**Import master country reference file**

In [4]:
# View all files in live directory
sorted(list(os.listdir(f"{path_live}")))

['.DS_Store',
 '112_input_gme_data.csv.gzip',
 '113_input_bri_members.csv.gzip',
 '120_input_scope_data.csv.gzip',
 '121_input_de_matrix.csv.gzip',
 '121_input_de_min_max.csv.gzip',
 '121_input_di_matrix.csv.gzip',
 '121_input_di_min_max.csv.gzip',
 '122_input_un_com_2016_mini.csv.gzip',
 '125_input_scope_country.csv.gzip',
 '2_raw_explainer_doc.md']

In [5]:
country_file = "120_input_scope_data.csv.gzip"
df_scope = pd.read_csv(f"{path_live}{country_file}",compression="gzip")
df_scope.head()

Unnamed: 0,year,country_d,iso3_d,dynamic_code_d,landlocked_d,island_d,region_d,gdp_pwt_const_d,pop_d,lat_d,lng_d,country_o,iso3_o,dynamic_code_o,landlocked_o,island_o,region_o,pop_o,lat_o,lng_o,contiguity,distance,bri_year_d,bri_flag_d
0,2016,United Arab Emirates,ARE,ARE,0,0,middle_east,,,25.094378,55.454674,Portugal,PRT,PRT,0,0,europe,,39.310741,-10.883486,0,6154.4214,2017.0,1.0
1,2016,Azerbaijan,AZE,AZE,0,0,europe,,,40.362438,47.255833,Kuwait,KWT,KWT,0,0,middle_east,,29.279369,47.930035,0,1236.6086,2015.0,1.0
2,2016,Belgium,BEL,BEL,0,0,europe,,,50.691814,4.581812,Cambodia,KHM,KHM,0,0,south_east_asia,,12.320902,104.8744,0,9693.0625,,
3,2016,China,CHN,CHN,0,0,east_asia,,,35.389668,114.00247,Belarus,BLR,BLR,1,0,europe,,53.604687,27.802185,0,6904.3364,2013.0,1.0
4,2016,Czech Republic,CZE,CZE,1,0,europe,,,49.817062,15.696862,Holy See,VAT,VAT,1,0,europe,,41.900013,12.447808,0,925.0072,2015.0,1.0


In [6]:
# get country list
list_scope_countries = list(df_scope.iso3_d.unique())
len(list_scope_countries)

88

Check countries are present in the World Bank database.

In [7]:
def f_wb_iso3_check(p_iso3_list):
    wb_country_list = []
    # Check presence in WB database
    for iso3 in p_iso3_list:
        try:
            wbdata.api.get_country(iso3)
            wb_country_list.append(iso3)
        except:
            print(f"{iso3} gave error")

    print(f"\n{len(wb_country_list)} are included in the refined list.")
    return wb_country_list

In [8]:
list_wb_scope = f_wb_iso3_check(list_scope_countries)

GAZ gave error
VAT gave error

86 are included in the refined list.


In [9]:
# see world bank data sources
#wbdata.get_source()

# logistics at 66

<a id="ID_part2"></a>
### Part 2 | Download data
|| [0| Default imports](#ID_top) || [1|Part1](#ID_part1) || [2|Part2](#ID_part2) || [3|Part3](#ID_part3) || [4|Part4](#ID_part4) || [5|Part5](#ID_part5) ||

**This section extracts data for:**<br>
**A.** Economic distance (WDI) - source 2<br>
**B.** Institutional distance (WGI) - source 3
<br><br>
Both are sourced from the World Bank. 

In [10]:
# Master variables
target_date = datetime(2016,1,1)

**A | Economic Distance (DE) | Download**

In [11]:
# Economic distance indicators (DE) based on WDI
de_indicator = ["NY.GDP.PCAP.PP.CD"]
target_date = target_date

In [12]:
# Iterate over country and indicator to correctly flag what is available
outputs = []
list_excluded = []
df_wb_de = pd.DataFrame()

# for every country in list of available countries for our desired region
for country in list_wb_scope:
    
    # iterate through the indicators one by one
    try:
        for indicator in de_indicator:
            df_wb_de.loc[country,indicator] = wbdata.api.get_series(indicator = indicator, source = "2",
                                                     country = country,data_date=target_date)[0] 
    except:
        print(f"WARNING | Country: {country} data not available")
        list_excluded.append(country)

**B | Institutional Distance (DI) | Download**

In [13]:
# Insitutional distance indicators (DI) based on WGI
indicator_list = ["CC.EST","GE.EST","PV.EST","RL.EST","RQ.EST","VA.EST"]
target_date = target_date

In [14]:
# Iterate over country and indicator to correctly flag what is available
outputs = []
list_excluded_di = []
df_wb_di = pd.DataFrame()

# for every country in list of available countries for our desired region
for country in list_wb_scope:
    
    # iterate through the indicators one by one
    try:
        for indicator in indicator_list:
            df_wb_di.loc[country,indicator] = wbdata.api.get_series(indicator = indicator, source = "3",
                                                     country = country,data_date=target_date)[0] 
    except:
        print(f"WARNING | Country: {country} data not available")
        list_excluded_di.append(country)



**C | Logistics performance index (LPI) | Download**

In [15]:
# see indicators
wbdata.get_indicator(source = 66)
# all lpi indicators
lpi_indicator_list = ["LP.LPI.CUST.XQ","LP.LPI.INFR.XQ","LP.LPI.ITRN.XQ",
                      "LP.LPI.LOGS.XQ","LP.LPI.TIME.XQ","LP.LPI.TRAC.XQ",
                      "LP.LPI.OVRL.XQ","LP.LPI.OVRL.RK.UB","LP.LPI.OVRL.XQ.LB"]
# selected indicators
lpi_indicator_list = [lpi_indicator_list[6]]

In [16]:
# Get specific year
# Iterate over country and indicator to correctly flag what is available
outputs = []
list_excluded_lpi = []
df_wb_lpi = pd.DataFrame()

# for every country in list of available countries for our desired region
for country in list_wb_scope:
    
    # iterate through the indicators one by one
    try:
        for indicator in lpi_indicator_list:
            df_wb_lpi.loc[country,indicator] = wbdata.api.get_series(indicator = indicator, source = "66",
                                       data_date=target_date,country = country)[0] 
    except:
        print(f"WARNING | Country: {country} data not available")
        list_excluded_lpi.append(country)



In [17]:
# Get all years!
# Iterate over country and indicator to correctly flag what is available
outputs = []
list_excluded_lpi = []
df_wb_lpi_list = []

# for every country in list of available countries for our desired region
for country in list_wb_scope:
    
    # iterate through the indicators one by one
    try:
        for indicator in lpi_indicator_list:
            temp_df = pd.DataFrame(wbdata.api.get_series(indicator = indicator, source = "66",country = country))

            temp_df.columns = [country]
            temp_df = temp_df.transpose().copy()
            df_wb_lpi_list.append(temp_df)   
            
    except:
        print(f"WARNING | Country: {country} data not available")
        list_excluded_lpi.append(country)

# concatenate all columns
df_wb_lpi_all_years = pd.concat(df_wb_lpi_list)
df_wb_lpi_all_years.head()



date,2018,2016,2014,2012,2010,2007
ARE,3.956437,3.941767,3.539098,3.77844,3.63034,3.727581
AZE,,,2.448376,2.481118,2.639554,2.290998
BEL,4.039084,4.108538,4.04466,3.980262,3.942263,3.893764
CHN,3.605147,3.661104,3.531463,3.517017,3.489039,3.321935
CZE,3.68033,3.674309,3.492416,3.141498,3.506553,3.134626


In [52]:
df_wb_lpi_all_years.loc[:,"2016"]

ARE    3.941767
AZE         NaN
BEL    4.108538
CHN    3.661104
CZE    3.674309
         ...   
TKM    2.211396
TLS         NaN
TUR    3.423693
UZB    2.404525
YEM         NaN
Name: 2016, Length: 76, dtype: float64

**A+B | Merge dataframes**

In [19]:
# Merge data
df_wb_all = df_wb_di.merge(df_wb_de,left_index = True, right_index= True).copy()
df_wb_all.head()

# only keep entries with data
df_wb_all.dropna(inplace= True)
missing_country = set(list_wb_scope) - set(list(df_wb_all.index))
print(f"Original list = {len(list_wb_scope)} | Economic and Institutional data for = {len(df_wb_all)}")
print(f"Countries with no data are: {missing_country}")

# rename columns
df_wb_all.columns = ["Control of corruption","Government effectiveness","Political Stability and Absence of Violence",
                     "Rule of Law","Regulatory Quality","Voice and Accountability","GDP per capita (PPP)"]

df_wb_all.head()

Original list = 86 | Economic and Institutional data for = 77
Countries with no data are: {'YEM', 'PRK', 'FRO', 'GIB', 'AND', 'MCO', 'SMR', 'LIE', 'SYR'}


Unnamed: 0,Control of corruption,Government effectiveness,Political Stability and Absence of Violence,Rule of Law,Regulatory Quality,Voice and Accountability,GDP per capita (PPP)
ARE,1.171709,1.415928,0.564749,0.849041,0.972596,-1.050421,63968.853871
AZE,-0.838504,-0.165613,-0.803879,-0.519861,-0.282023,-1.563101,14371.022741
BEL,1.638705,1.326213,0.441367,1.390965,1.340684,1.376962,48608.957782
CHN,-0.254059,0.354875,-0.499395,-0.334415,-0.264475,-1.561287,13572.620725
CZE,0.536757,1.036739,0.976116,1.035502,0.986401,1.024769,35876.727438


**(A+B) + C | Second merging**

In [54]:
# Merge data
#df_wb_complete = df_wb_all.merge(df_wb_lpi,left_index = True, right_index= True).copy()
df_wb_complete = df_wb_all.merge(df_wb_lpi_all_years.loc[:,"2016"],left_index = True, right_index= True).copy()

#df_wb_all.head()

# only keep entries with data
df_wb_complete.dropna(inplace= True)
missing_country = set(list_wb_scope) - set(list(df_wb_complete.index))
print(f"Original list = {len(list_wb_scope)} | Economic and Institutional data for = {len(df_wb_complete)}")
print(f"Countries with no data are: {missing_country}")

# rename columns
df_wb_complete.columns = ["Control of corruption","Government effectiveness","Political Stability and Absence of Violence",
                     "Rule of Law","Regulatory Quality","Voice and Accountability","GDP per capita (PPP)","Logistics Performance Index"]

print(df_wb_complete.shape)
df_wb_complete.reset_index().head()

Original list = 86 | Economic and Institutional data for = 72
Countries with no data are: {'YEM', 'PRK', 'KSV', 'MAC', 'PSE', 'GIB', 'FRO', 'AND', 'MCO', 'TLS', 'SMR', 'LIE', 'AZE', 'SYR'}
(72, 8)


Unnamed: 0,index,Control of corruption,Government effectiveness,Political Stability and Absence of Violence,Rule of Law,Regulatory Quality,Voice and Accountability,GDP per capita (PPP),Logistics Performance Index
0,ARE,1.171709,1.415928,0.564749,0.849041,0.972596,-1.050421,63968.853871,3.941767
1,BEL,1.638705,1.326213,0.441367,1.390965,1.340684,1.376962,48608.957782,4.108538
2,CHN,-0.254059,0.354875,-0.499395,-0.334415,-0.264475,-1.561287,13572.620725,3.661104
3,CZE,0.536757,1.036739,0.976116,1.035502,0.986401,1.024769,35876.727438,3.674309
4,ESP,0.51563,1.115842,0.413188,0.978825,1.008604,1.041046,37282.442513,3.727412


In [55]:
# Save to live
filename = "wb_distance_lpi_2016"
s_file_export.f_df_export(df_wb_complete.reset_index(),filename,p_file_id=f"{notebook_id}_",p_loc2=path_raw,p_loc2_pre="raw_")

Export | ../Data/1_raw_processed_backup/121_store_wb_distance_lpi_2016_20200822_1520.csv | COMPLETE
COPY   | ../Data/0_raw/121_raw_wb_distance_lpi_2016.csv.gzip | COMPLETE


In [56]:
# Save to live
filename = "wb_distance_2016"
s_file_export.f_df_export(df_wb_all,filename,p_file_id=f"{notebook_id}_",p_loc2=path_raw,p_loc2_pre="raw_")

Export | ../Data/1_raw_processed_backup/121_store_wb_distance_2016_20200822_1520.csv | COMPLETE
COPY   | ../Data/0_raw/121_raw_wb_distance_2016.csv.gzip | COMPLETE


<a id="ID_part3"></a>
### Part 3 | Calculate martices
|| [0| Default imports](#ID_top) || [1|Part1](#ID_part1) || [2|Part2](#ID_part2) || [3|Part3](#ID_part3) || [4|Part4](#ID_part4) || [5|Part5](#ID_part5) ||

**This section calculates and manipulates data for:**<br>
**A.** Economic distance (WDI)<br>
**B.** Institutional distance (WGI)
<br><br>
Both are sourced from the World Bank. 

1. A matrix of each country vs each country is created in the order of the columns in table `df_wb_all` and stored in object `distance_matrices`

2. Matrices are separated and the final distance is calculated for each kind of measure.

In [65]:
# replace old DF with the Complete version that includes LPI
df_wb_all = df_wb_complete.copy()

In [66]:
# Basics variables for matrix to determine size and labels
matrix_size = len(df_wb_all)
matrix_index = list(df_wb_all.index)

In [68]:
# storing object for matrices
distance_matrices = []

# for every column
for column in df_wb_all.columns:
    # create zero matrix
    df_matrix_zero = pd.DataFrame(np.zeros(shape = (matrix_size, matrix_size)),columns = matrix_index,index = matrix_index)
    
    # for every country (index)
    for entry in df_wb_all.index:
        
        for entry_l2 in df_wb_all.index:
            # calculate difference
            temp_difference = df_wb_all.loc[entry,column] - df_wb_all.loc[entry_l2,column]
            
            # assign calculated value to correct position in matrix
            df_matrix_zero.loc[entry,entry_l2] = temp_difference
            
    distance_matrices.append(df_matrix_zero)

**A | Economic distance | calculation and normalisation (0-1)**

In [69]:
distance_matrices[6].head()

Unnamed: 0,ARE,BEL,CHN,CZE,ESP,GEO,HUN,IRQ,ITA,KGZ,KOR,LTU,OMN,SRB,SWE,THA,TJK,UKR,VNM,AFG,ALB,ARM,AUT,BGD,BGR,BIH,BLR,BRN,BTN,CHE,DEU,DNK,EST,FIN,FRA,GRC,HKG,HRV,IDN,IND,IRL,IRN,ISR,JOR,KAZ,KHM,KWT,LAO,LBN,LUX,LVA,MDA,MKD,MMR,MNG,MNE,MYS,NLD,NOR,NPL,PAK,POL,PRT,QAT,ROU,RUS,SAU,SGP,SVK,TKM,TUR,UZB
ARE,0.0,15359.89609,50396.233146,28092.126434,26686.411358,51110.370471,36269.222934,54071.85243,24045.945388,59287.679138,24401.837247,33043.922928,35042.715678,48234.520983,13543.699244,47350.091682,60871.651844,52820.657325,57395.757533,61839.865845,51965.921973,53264.620463,11335.599583,60119.742884,44083.072004,50978.838063,46243.101692,7672.834104,53511.65207,-1751.292658,13404.602563,11998.104071,32737.382725,19038.945094,21048.587053,36145.492027,6747.440108,39250.724189,53474.293405,58128.955282,-7952.863178,49957.308743,26124.876061,54684.9832,40150.029045,60293.815951,19283.503248,57125.292289,48481.744619,-46680.842747,37564.328185,53358.376029,48985.687397,59476.83555,53226.200779,45883.824085,38422.844643,11685.697022,5034.898095,61385.910198,59558.759118,35685.152434,32364.30756,-19133.631799,39803.101306,39843.455008,18483.194297,-25417.225529,34317.329737,50102.529153,37639.493702,57515.925934
BEL,-15359.89609,0.0,35036.337056,12732.230344,11326.515268,35750.474381,20909.326844,38711.95634,8686.049298,43927.783049,9041.941157,17684.026838,19682.819588,32874.624893,-1816.196846,31990.195592,45511.755754,37460.761235,42035.861444,46479.969755,36606.025883,37904.724374,-4024.296507,44759.846795,28723.175914,35618.941973,30883.205602,-7687.061986,38151.755981,-17111.188747,-1955.293526,-3361.792019,17377.486635,3679.049005,5688.690963,20785.595937,-8612.455982,23890.828099,38114.397315,42769.059193,-23312.759267,34597.412653,10764.979971,39325.087111,24790.132956,44933.919861,3923.607158,41765.396199,33121.848529,-62040.738837,22204.432096,37998.479939,33625.791307,44116.939461,37866.304689,30523.927996,23062.948553,-3674.199067,-10324.997994,46026.014108,44198.863028,20325.256344,17004.411471,-34493.527888,24443.205216,24483.558918,3123.298207,-40777.121618,18957.433647,34742.633064,22279.597612,42156.029844
CHN,-50396.233146,-35036.337056,0.0,-22304.106712,-23709.821788,714.137325,-14127.010212,3675.619284,-26350.287758,8891.445993,-25994.395899,-17352.310218,-15353.517468,-2161.712163,-36852.533902,-3046.141464,10475.418698,2424.424179,6999.524387,11443.632699,1569.688827,2868.387317,-39060.633563,9723.509739,-6313.161142,582.604917,-4153.131454,-42723.399042,3115.418925,-52147.525804,-36991.630582,-38398.129075,-17658.850421,-31357.288052,-29347.646093,-14250.741119,-43648.793038,-11145.508957,3078.060259,7732.722136,-58349.096323,-438.924403,-24271.357085,4288.750055,-10246.204101,9897.582805,-31112.729898,6729.059143,-1914.488527,-97077.075893,-12831.904961,2962.142883,-1410.545749,9080.602404,2829.967633,-4512.409061,-11973.388503,-38710.536124,-45361.33505,10989.677052,9162.525972,-14711.080712,-18031.925586,-69529.864944,-10593.13184,-10552.778138,-31913.038849,-75813.458674,-16078.903409,-293.703993,-12756.739444,7119.692788
CZE,-28092.126434,-12732.230344,22304.106712,0.0,-1405.715076,23018.244037,8177.0965,25979.725996,-4046.181046,31195.552705,-3690.289187,4951.796494,6950.589244,20142.394549,-14548.42719,19257.965248,32779.52541,24728.530891,29303.6311,33747.739411,23873.795539,25172.494029,-16756.526851,32027.616451,15990.94557,22886.711629,18150.975258,-20419.29233,25419.525637,-29843.419092,-14687.52387,-16094.022363,4645.256291,-9053.181339,-7043.539381,8053.365593,-21344.686326,11158.597755,25382.166971,30036.828849,-36044.989611,21865.182309,-1967.250373,26592.856767,12057.902612,32201.689517,-8808.623186,29033.165855,20389.618185,-74772.969181,9472.201751,25266.249595,20893.560963,31384.709116,25134.074345,17791.697652,10330.718209,-16406.429411,-23057.228338,33293.783764,31466.632684,7593.026,4272.181126,-47225.758232,11710.974872,11751.328574,-9608.932137,-53509.351962,6225.203303,22010.40272,9547.367268,29423.7995
ESP,-26686.411358,-11326.515268,23709.821788,1405.715076,0.0,24423.959113,9582.811576,27385.441072,-2640.465971,32601.26778,-2284.574111,6357.51157,8356.304319,21548.109625,-13142.712114,20663.680324,34185.240486,26134.245967,30709.346175,35153.454487,25279.510614,26578.209105,-15350.811775,33433.331526,17396.660646,24292.426705,19556.690334,-19013.577255,26825.240712,-28437.704016,-13281.808795,-14688.307287,6050.971367,-7647.466264,-5637.824305,9459.080669,-19938.971251,12564.312831,26787.882046,31442.543924,-34639.274536,23270.897384,-561.535298,27998.571842,13463.617687,33607.404593,-7402.90811,30438.880931,21795.333261,-73367.254105,10877.916827,26671.96467,22299.276038,32790.424192,26539.789421,19197.412727,11736.433285,-15000.714336,-21651.513263,34699.49884,32872.34776,8998.741076,5677.896202,-45820.043157,13116.689948,13157.04365,-8203.217061,-52103.636887,7630.918379,23416.117795,10953.082344,30829.514575


In [25]:
# apply eucledian distance formula for consistency

In [70]:
# light touch final processing
df_de = distance_matrices[6].copy()
matrix_de = df_de.to_numpy() # create array

matrix_de = np.square(matrix_de) # square all values
# no need to divide by the number of dimensions as there is only one
matrix_de = np.sqrt(matrix_de) # square root all values

df_de = pd.DataFrame(matrix_de,index = matrix_index,columns =matrix_index)
df_de.head()

Unnamed: 0,ARE,BEL,CHN,CZE,ESP,GEO,HUN,IRQ,ITA,KGZ,KOR,LTU,OMN,SRB,SWE,THA,TJK,UKR,VNM,AFG,ALB,ARM,AUT,BGD,BGR,BIH,BLR,BRN,BTN,CHE,DEU,DNK,EST,FIN,FRA,GRC,HKG,HRV,IDN,IND,IRL,IRN,ISR,JOR,KAZ,KHM,KWT,LAO,LBN,LUX,LVA,MDA,MKD,MMR,MNG,MNE,MYS,NLD,NOR,NPL,PAK,POL,PRT,QAT,ROU,RUS,SAU,SGP,SVK,TKM,TUR,UZB
ARE,0.0,15359.89609,50396.233146,28092.126434,26686.411358,51110.370471,36269.222934,54071.85243,24045.945388,59287.679138,24401.837247,33043.922928,35042.715678,48234.520983,13543.699244,47350.091682,60871.651844,52820.657325,57395.757533,61839.865845,51965.921973,53264.620463,11335.599583,60119.742884,44083.072004,50978.838063,46243.101692,7672.834104,53511.65207,1751.292658,13404.602563,11998.104071,32737.382725,19038.945094,21048.587053,36145.492027,6747.440108,39250.724189,53474.293405,58128.955282,7952.863178,49957.308743,26124.876061,54684.9832,40150.029045,60293.815951,19283.503248,57125.292289,48481.744619,46680.842747,37564.328185,53358.376029,48985.687397,59476.83555,53226.200779,45883.824085,38422.844643,11685.697022,5034.898095,61385.910198,59558.759118,35685.152434,32364.30756,19133.631799,39803.101306,39843.455008,18483.194297,25417.225529,34317.329737,50102.529153,37639.493702,57515.925934
BEL,15359.89609,0.0,35036.337056,12732.230344,11326.515268,35750.474381,20909.326844,38711.95634,8686.049298,43927.783049,9041.941157,17684.026838,19682.819588,32874.624893,1816.196846,31990.195592,45511.755754,37460.761235,42035.861444,46479.969755,36606.025883,37904.724374,4024.296507,44759.846795,28723.175914,35618.941973,30883.205602,7687.061986,38151.755981,17111.188747,1955.293526,3361.792019,17377.486635,3679.049005,5688.690963,20785.595937,8612.455982,23890.828099,38114.397315,42769.059193,23312.759267,34597.412653,10764.979971,39325.087111,24790.132956,44933.919861,3923.607158,41765.396199,33121.848529,62040.738837,22204.432096,37998.479939,33625.791307,44116.939461,37866.304689,30523.927996,23062.948553,3674.199067,10324.997994,46026.014108,44198.863028,20325.256344,17004.411471,34493.527888,24443.205216,24483.558918,3123.298207,40777.121618,18957.433647,34742.633064,22279.597612,42156.029844
CHN,50396.233146,35036.337056,0.0,22304.106712,23709.821788,714.137325,14127.010212,3675.619284,26350.287758,8891.445993,25994.395899,17352.310218,15353.517468,2161.712163,36852.533902,3046.141464,10475.418698,2424.424179,6999.524387,11443.632699,1569.688827,2868.387317,39060.633563,9723.509739,6313.161142,582.604917,4153.131454,42723.399042,3115.418925,52147.525804,36991.630582,38398.129075,17658.850421,31357.288052,29347.646093,14250.741119,43648.793038,11145.508957,3078.060259,7732.722136,58349.096323,438.924403,24271.357085,4288.750055,10246.204101,9897.582805,31112.729898,6729.059143,1914.488527,97077.075893,12831.904961,2962.142883,1410.545749,9080.602404,2829.967633,4512.409061,11973.388503,38710.536124,45361.33505,10989.677052,9162.525972,14711.080712,18031.925586,69529.864944,10593.13184,10552.778138,31913.038849,75813.458674,16078.903409,293.703993,12756.739444,7119.692788
CZE,28092.126434,12732.230344,22304.106712,0.0,1405.715076,23018.244037,8177.0965,25979.725996,4046.181046,31195.552705,3690.289187,4951.796494,6950.589244,20142.394549,14548.42719,19257.965248,32779.52541,24728.530891,29303.6311,33747.739411,23873.795539,25172.494029,16756.526851,32027.616451,15990.94557,22886.711629,18150.975258,20419.29233,25419.525637,29843.419092,14687.52387,16094.022363,4645.256291,9053.181339,7043.539381,8053.365593,21344.686326,11158.597755,25382.166971,30036.828849,36044.989611,21865.182309,1967.250373,26592.856767,12057.902612,32201.689517,8808.623186,29033.165855,20389.618185,74772.969181,9472.201751,25266.249595,20893.560963,31384.709116,25134.074345,17791.697652,10330.718209,16406.429411,23057.228338,33293.783764,31466.632684,7593.026,4272.181126,47225.758232,11710.974872,11751.328574,9608.932137,53509.351962,6225.203303,22010.40272,9547.367268,29423.7995
ESP,26686.411358,11326.515268,23709.821788,1405.715076,0.0,24423.959113,9582.811576,27385.441072,2640.465971,32601.26778,2284.574111,6357.51157,8356.304319,21548.109625,13142.712114,20663.680324,34185.240486,26134.245967,30709.346175,35153.454487,25279.510614,26578.209105,15350.811775,33433.331526,17396.660646,24292.426705,19556.690334,19013.577255,26825.240712,28437.704016,13281.808795,14688.307287,6050.971367,7647.466264,5637.824305,9459.080669,19938.971251,12564.312831,26787.882046,31442.543924,34639.274536,23270.897384,561.535298,27998.571842,13463.617687,33607.404593,7402.90811,30438.880931,21795.333261,73367.254105,10877.916827,26671.96467,22299.276038,32790.424192,26539.789421,19197.412727,11736.433285,15000.714336,21651.513263,34699.49884,32872.34776,8998.741076,5677.896202,45820.043157,13116.689948,13157.04365,8203.217061,52103.636887,7630.918379,23416.117795,10953.082344,30829.514575


5184

In [77]:
# rescale (0-1)
scaler=preprocessing.MinMaxScaler() # define scaler
# fit data on reshaped one dimensional array (this preserves matching distances between pairs)
print(scaler.fit(matrix_de.reshape((matrix_de.shape[0]*matrix_de.shape[1],1)))) 
matrix_de_minmax = scaler.transform(matrix_de.reshape((matrix_de.shape[0]*matrix_de.shape[1],1))) # save as array

# back to data frame
df_de_minmax = pd.DataFrame(matrix_de_minmax.reshape((matrix_de.shape[0],matrix_de.shape[1])),index = matrix_index,columns =matrix_index)
df_de_minmax.head()

MinMaxScaler(copy=True, feature_range=(0, 1))


Unnamed: 0,ARE,BEL,CHN,CZE,ESP,GEO,HUN,IRQ,ITA,KGZ,KOR,LTU,OMN,SRB,SWE,THA,TJK,UKR,VNM,AFG,ALB,ARM,AUT,BGD,BGR,BIH,BLR,BRN,BTN,CHE,DEU,DNK,EST,FIN,FRA,GRC,HKG,HRV,IDN,IND,IRL,IRN,ISR,JOR,KAZ,KHM,KWT,LAO,LBN,LUX,LVA,MDA,MKD,MMR,MNG,MNE,MYS,NLD,NOR,NPL,PAK,POL,PRT,QAT,ROU,RUS,SAU,SGP,SVK,TKM,TUR,UZB
ARE,0.0,0.141539,0.464393,0.258864,0.245911,0.470973,0.334215,0.498263,0.221579,0.546326,0.224859,0.304494,0.322913,0.444473,0.124803,0.436323,0.560922,0.486733,0.528892,0.569844,0.478857,0.490824,0.104456,0.553993,0.406218,0.469761,0.426122,0.070704,0.493101,0.016138,0.123521,0.110561,0.301669,0.175441,0.193959,0.333075,0.062177,0.361689,0.492757,0.535649,0.073284,0.460348,0.240736,0.503913,0.369976,0.555597,0.177694,0.5264,0.446751,0.430156,0.346149,0.491688,0.451395,0.548069,0.49047,0.422812,0.35406,0.107682,0.046396,0.565661,0.548824,0.328833,0.298232,0.176313,0.366779,0.367151,0.17032,0.234215,0.316228,0.461686,0.346842,0.53
BEL,0.141539,0.0,0.322854,0.117325,0.104372,0.329435,0.192676,0.356724,0.08004,0.404787,0.08332,0.162955,0.181374,0.302934,0.016736,0.294784,0.419383,0.345195,0.387353,0.428305,0.337318,0.349286,0.037083,0.412454,0.264679,0.328223,0.284584,0.070835,0.351562,0.157677,0.018018,0.030978,0.160131,0.033902,0.05242,0.191536,0.079362,0.22015,0.351218,0.39411,0.214823,0.318809,0.099197,0.362374,0.228437,0.414058,0.036155,0.384861,0.305212,0.571695,0.20461,0.35015,0.309856,0.40653,0.348932,0.281273,0.212521,0.033857,0.095143,0.424122,0.407285,0.187294,0.156693,0.317852,0.22524,0.225612,0.028781,0.375754,0.17469,0.320147,0.205303,0.388461
CHN,0.464393,0.322854,0.0,0.205529,0.218482,0.006581,0.130178,0.03387,0.242813,0.081933,0.239534,0.159899,0.14148,0.01992,0.33959,0.02807,0.096529,0.022341,0.064499,0.105451,0.014464,0.026432,0.359937,0.0896,0.058175,0.005369,0.03827,0.393689,0.028708,0.480531,0.340872,0.353832,0.162723,0.288952,0.270434,0.131318,0.402216,0.102704,0.028364,0.071256,0.537677,0.004045,0.223656,0.03952,0.094417,0.091205,0.286699,0.062007,0.017642,0.894549,0.118244,0.027296,0.012998,0.083676,0.026078,0.041581,0.110333,0.356711,0.417997,0.101268,0.084431,0.13556,0.166161,0.640706,0.097614,0.097242,0.294073,0.698608,0.148164,0.002706,0.117551,0.065607
CZE,0.258864,0.117325,0.205529,0.0,0.012953,0.212109,0.075351,0.239399,0.037285,0.287462,0.034005,0.04563,0.064049,0.185609,0.134061,0.177459,0.302058,0.227869,0.270028,0.31098,0.219993,0.23196,0.154409,0.295129,0.147354,0.210897,0.167258,0.18816,0.234237,0.275002,0.135343,0.148304,0.042805,0.083424,0.064905,0.07421,0.196688,0.102825,0.233892,0.276784,0.332148,0.201484,0.018128,0.245049,0.111112,0.296733,0.08117,0.267536,0.187887,0.68902,0.087285,0.232824,0.192531,0.289205,0.231606,0.163947,0.095196,0.151182,0.212468,0.306797,0.28996,0.069968,0.039367,0.435177,0.107915,0.108287,0.088545,0.49308,0.057364,0.202822,0.087977,0.271135
ESP,0.245911,0.104372,0.218482,0.012953,0.0,0.225063,0.088304,0.252352,0.024331,0.300415,0.021052,0.058583,0.077002,0.198562,0.121108,0.190412,0.315011,0.240823,0.282981,0.323933,0.232946,0.244914,0.141455,0.308083,0.160307,0.223851,0.180212,0.175207,0.24719,0.262049,0.12239,0.13535,0.055759,0.07047,0.051952,0.087164,0.183734,0.115778,0.246846,0.289738,0.319195,0.214437,0.005174,0.258002,0.124065,0.309687,0.068217,0.280489,0.20084,0.676067,0.100238,0.245778,0.205484,0.302158,0.24456,0.176901,0.108149,0.138229,0.199515,0.31975,0.302913,0.082922,0.052321,0.422224,0.120868,0.12124,0.075591,0.480126,0.070318,0.215776,0.100931,0.284089


**B | Institutional distance | calculation**

Nice explanation of eucledian distance and formulae, [here](https://www.pbarrett.net/techpapers/euclid.pdf)
1. take difference for one dimension / column / matrix
2. square (1)
3. divide (2) by variance (to standardise the data)
4. sum across dimensions
5. **square root the results**



Equation from Wang et al. (2020) (based in KSI)
1. take difference for one variable
2. square (1)
3. divide (2) variance (to standardise the data)
4. sum across dimensions
5. **divide by number of dimensions**

In [78]:
# list of matrices
list_processed_matrix = []
institutional_variables = distance_matrices[0:6]

# we have five spatial weight matrices which need to be treated together
for index,matrix in enumerate(institutional_variables):
    # Step 1 | take matrix which has step 1 done
    
    # Step 2 | square values
    matrix_square = np.square(distance_matrices[0].to_numpy())

    # Step 3a | calculate variance
    matrix_var = distance_matrices[0].to_numpy().var()

    # Step 3b | step 2 divided by var
    matrix_processed = matrix_square / matrix_var
    
    # Step 4a | add all array into a list for easier summing
    list_processed_matrix.append(matrix_processed)

# Step 4b | Sum all
matrix_di = sum(list_processed_matrix)

# Step 5 | Square root
matrix_di = np.sqrt(matrix_di)
# Step 5 | incorrect step(?) from Wang et al (2020) based on the standard practice
#matrix_di = matrix_di / len(institutional_variables) 

# create df
df_di = pd.DataFrame(matrix_di,index = matrix_index,columns=matrix_index)
df_di.head()

Unnamed: 0,ARE,BEL,CHN,CZE,ESP,GEO,HUN,IRQ,ITA,KGZ,KOR,LTU,OMN,SRB,SWE,THA,TJK,UKR,VNM,AFG,ALB,ARM,AUT,BGD,BGR,BIH,BLR,BRN,BTN,CHE,DEU,DNK,EST,FIN,FRA,GRC,HKG,HRV,IDN,IND,IRL,IRN,ISR,JOR,KAZ,KHM,KWT,LAO,LBN,LUX,LVA,MDA,MKD,MMR,MNG,MNE,MYS,NLD,NOR,NPL,PAK,POL,PRT,QAT,ROU,RUS,SAU,SGP,SVK,TKM,TUR,UZB
ARE,0.0,0.765912,2.338378,1.041373,1.076023,0.797663,1.762212,4.196436,1.783265,3.691852,1.172325,0.750949,1.364918,2.450591,1.673099,2.559666,3.802116,3.256891,2.663789,4.425043,2.586369,2.859752,0.618029,3.327397,2.19796,2.670325,2.343316,0.989444,0.07414,1.33937,1.101658,1.736654,0.165051,1.751311,0.374651,2.072728,0.639596,1.58822,2.576628,2.384625,0.675135,3.088435,0.024765,1.486597,3.261183,4.00939,2.361582,3.483791,3.504709,1.517314,1.214597,3.482549,2.389581,2.940419,2.720753,2.08443,1.764168,1.21596,1.680878,3.233619,3.357534,0.709506,0.403792,0.444348,1.952393,3.270976,1.542997,1.50296,1.545865,4.412371,2.235875,3.838777
BEL,0.765912,0.0,3.10429,1.807285,1.841935,1.563575,2.528124,4.962348,2.549177,4.457764,1.938237,1.516861,2.13083,3.216503,0.907187,3.325578,4.568028,4.022803,3.429701,5.190955,3.352281,3.625664,0.147883,4.093309,2.963872,3.436237,3.109228,1.755356,0.840052,0.573458,0.335746,0.970742,0.600861,0.985399,0.391261,2.83864,0.126316,2.354132,3.34254,3.150537,0.090777,3.854347,0.741147,2.252508,4.027095,4.775302,3.127494,4.249703,4.270621,0.751402,1.980509,4.248461,3.155493,3.706331,3.486664,2.850342,2.53008,0.450048,0.914966,3.999531,4.123446,1.475418,1.169704,1.21026,2.718305,4.036888,2.308909,0.737048,2.311777,5.178283,3.001787,4.604689
CHN,2.338378,3.10429,0.0,1.297004,1.262355,1.540714,0.576166,1.858059,0.555112,1.353474,1.166053,1.587428,0.97346,0.112213,4.011477,0.221288,1.463739,0.918513,0.325411,2.086665,0.247991,0.521374,2.956407,0.989019,0.140417,0.331947,0.004938,1.348934,2.264238,3.677748,3.440036,4.075032,2.503429,4.089689,2.713028,0.26565,2.977974,0.750158,0.23825,0.046247,3.013513,0.750057,2.363143,0.851781,0.922805,1.671012,0.023204,1.145413,1.166331,3.855692,1.123781,1.144171,0.051203,0.602041,0.382375,0.253948,0.57421,3.554338,4.019256,0.895242,1.019156,1.628872,1.934586,1.894029,0.385984,0.932598,0.795381,3.841338,0.792513,2.073994,0.102503,1.500399
CZE,1.041373,1.807285,1.297004,0.0,0.03465,0.24371,0.720839,3.155063,0.741892,2.650479,0.130952,0.290424,0.323545,1.409218,2.714472,1.518293,2.760743,2.215518,1.622416,3.383669,1.544996,1.818379,1.659402,2.286024,1.156587,1.628952,1.301942,0.051929,0.967233,2.380743,2.143032,2.778027,1.206425,2.792685,1.416024,1.031354,1.680969,0.546847,1.535255,1.343252,1.716508,2.047062,1.066139,0.445223,2.21981,2.968017,1.320208,2.442418,2.463336,2.558687,0.173224,2.441175,1.348208,1.899045,1.679379,1.043057,0.722795,2.257334,2.722251,2.192246,2.316161,0.331867,0.637582,0.597025,0.91102,2.229602,0.501624,2.544333,0.504492,3.370998,1.194502,2.797404
ESP,1.076023,1.841935,1.262355,0.03465,0.0,0.27836,0.686189,3.120414,0.707242,2.615829,0.096302,0.325074,0.288895,1.374568,2.749122,1.483643,2.726093,2.180868,1.587766,3.34902,1.510346,1.783729,1.694052,2.251374,1.121937,1.594302,1.267293,0.086579,1.001883,2.415393,2.177681,2.812677,1.241074,2.827334,1.450673,0.996705,1.715619,0.512197,1.500605,1.308602,1.751158,2.012412,1.100788,0.410574,2.18516,2.933367,1.285559,2.407768,2.428686,2.593337,0.138574,2.406526,1.313558,1.864396,1.64473,1.008407,0.688145,2.291983,2.756901,2.157597,2.281511,0.366517,0.672231,0.631674,0.876371,2.194953,0.466974,2.578983,0.469842,3.336348,1.159852,2.762754


In [107]:
matrix_di

array([[0.        , 0.76591196, 2.33837779, ..., 4.41237133, 2.23587493,
        3.83877708],
       [0.76591196, 0.        , 3.10428975, ..., 5.17828329, 3.00178689,
        4.60468904],
       [2.33837779, 3.10428975, 0.        , ..., 2.07399354, 0.10250286,
        1.50039929],
       ...,
       [4.41237133, 5.17828329, 2.07399354, ..., 0.        , 2.1764964 ,
        0.57359425],
       [2.23587493, 3.00178689, 0.10250286, ..., 2.1764964 , 0.        ,
        1.60290215],
       [3.83877708, 4.60468904, 1.50039929, ..., 0.57359425, 1.60290215,
        0.        ]])

In [114]:
# rescale (0-1)
scaler=preprocessing.MinMaxScaler() # define scaler
# fit data on reshaped one dimensional array (this preserves matching distances between pairs)
print(scaler.fit(matrix_di.reshape((matrix_di.shape[0]*matrix_di.shape[1],1)))) 
matrix_di_minmax = scaler.transform(matrix_di.reshape((matrix_di.shape[0]*matrix_di.shape[1],1))) # save as array

# back to data frame
df_di_minmax = pd.DataFrame(matrix_di_minmax.reshape((matrix_di.shape[0],matrix_di.shape[1])),index = matrix_index,columns =matrix_index)
df_di_minmax.head()

MinMaxScaler(copy=True, feature_range=(0, 1))


Unnamed: 0,ARE,BEL,CHN,CZE,ESP,GEO,HUN,IRQ,ITA,KGZ,KOR,LTU,OMN,SRB,SWE,THA,TJK,UKR,VNM,AFG,ALB,ARM,AUT,BGD,BGR,BIH,BLR,BRN,BTN,CHE,DEU,DNK,EST,FIN,FRA,GRC,HKG,HRV,IDN,IND,IRL,IRN,ISR,JOR,KAZ,KHM,KWT,LAO,LBN,LUX,LVA,MDA,MKD,MMR,MNG,MNE,MYS,NLD,NOR,NPL,PAK,POL,PRT,QAT,ROU,RUS,SAU,SGP,SVK,TKM,TUR,UZB
ARE,0.0,0.124007,0.378602,0.168606,0.174217,0.129148,0.285316,0.679436,0.288725,0.59774,0.189809,0.121585,0.220991,0.39677,0.270888,0.41443,0.615592,0.527316,0.431288,0.716449,0.418753,0.463016,0.100064,0.538732,0.355867,0.432347,0.379401,0.160199,0.012004,0.216854,0.178367,0.281178,0.026723,0.283551,0.060659,0.335591,0.103556,0.257145,0.417176,0.386089,0.10931,0.500042,0.00401,0.240692,0.528011,0.649152,0.382359,0.564053,0.56744,0.245665,0.196653,0.563852,0.386892,0.476077,0.440511,0.337486,0.285633,0.196874,0.272147,0.523548,0.543611,0.114875,0.065377,0.071943,0.316108,0.529597,0.249823,0.243341,0.250288,0.714397,0.362006,0.621528
BEL,0.124007,0.0,0.502609,0.292614,0.298224,0.253155,0.409323,0.803443,0.412732,0.721747,0.313816,0.245592,0.344998,0.520777,0.146881,0.538437,0.7396,0.651323,0.555295,0.840456,0.542761,0.587023,0.023943,0.662739,0.479874,0.556354,0.503408,0.284206,0.136011,0.092847,0.05436,0.157171,0.097284,0.159544,0.063348,0.459598,0.020452,0.381152,0.541183,0.510097,0.014697,0.624049,0.119997,0.364699,0.652018,0.773159,0.506366,0.68806,0.691447,0.121658,0.32066,0.687859,0.510899,0.600084,0.564518,0.461493,0.40964,0.072866,0.14814,0.647555,0.667618,0.238882,0.189384,0.195951,0.440115,0.653604,0.37383,0.119334,0.374295,0.838405,0.486013,0.745535
CHN,0.378602,0.502609,0.0,0.209995,0.204385,0.249454,0.093286,0.300834,0.089877,0.219138,0.188793,0.257017,0.157611,0.018168,0.649489,0.035828,0.236991,0.148714,0.052687,0.337847,0.040152,0.084415,0.478665,0.16013,0.022735,0.053745,0.000799,0.218403,0.366598,0.595456,0.556969,0.659779,0.405325,0.662153,0.439261,0.043011,0.482157,0.121456,0.038575,0.007488,0.487911,0.12144,0.382611,0.13791,0.149409,0.27055,0.003757,0.185451,0.188838,0.624267,0.181949,0.18525,0.00829,0.097475,0.061909,0.041116,0.092969,0.575475,0.650749,0.144947,0.165009,0.263727,0.313225,0.306658,0.062494,0.150995,0.128778,0.621943,0.128314,0.335796,0.016596,0.242926
CZE,0.168606,0.292614,0.209995,0.0,0.00561,0.039459,0.116709,0.510829,0.120118,0.429133,0.021202,0.047022,0.052384,0.228163,0.439494,0.245823,0.446986,0.35871,0.262682,0.547843,0.250147,0.29441,0.26867,0.370125,0.18726,0.26374,0.210795,0.008408,0.156603,0.385461,0.346974,0.449784,0.19533,0.452157,0.229265,0.166984,0.272162,0.088539,0.24857,0.217483,0.277916,0.331435,0.172616,0.072085,0.359405,0.480545,0.213752,0.395447,0.398833,0.414272,0.028046,0.395245,0.218285,0.30747,0.271905,0.168879,0.117026,0.36548,0.440754,0.354942,0.375005,0.053732,0.103229,0.096663,0.147501,0.36099,0.081217,0.411947,0.081681,0.545791,0.193399,0.452922
ESP,0.174217,0.298224,0.204385,0.00561,0.0,0.045069,0.111099,0.505219,0.114508,0.423523,0.015592,0.052632,0.046774,0.222553,0.445104,0.240213,0.441376,0.3531,0.257072,0.542232,0.244537,0.2888,0.27428,0.364515,0.18165,0.25813,0.205185,0.014018,0.162213,0.391071,0.352584,0.455394,0.20094,0.457768,0.234875,0.161374,0.277772,0.082929,0.24296,0.211873,0.283526,0.325825,0.178226,0.066475,0.353794,0.474935,0.208142,0.389836,0.393223,0.419882,0.022436,0.389635,0.212675,0.30186,0.266295,0.163269,0.111416,0.37109,0.446364,0.349332,0.369395,0.059342,0.108839,0.102273,0.141891,0.35538,0.075607,0.417558,0.076071,0.540181,0.187789,0.447312


**C | LPI | calculation**

In [81]:
# Basics variables for matrix to determine size and labels
matrix_size = len(df_wb_complete)
matrix_index = list(df_wb_complete.index)

In [143]:
# recalculate LPI # turn into array
matrix_lpi_raw = df_wb_complete.loc[:,'Logistics Performance Index'].to_numpy()
# rescale
matrix_lpi_adjusted = abs(((matrix_lpi_raw - 1) / 4) -1)+1

In [145]:
df_wb_complete["LPI_adjusted"] = matrix_lpi_adjusted

In [146]:
df_wb_complete

Unnamed: 0,Control of corruption,Government effectiveness,Political Stability and Absence of Violence,Rule of Law,Regulatory Quality,Voice and Accountability,GDP per capita (PPP),Logistics Performance Index,LPI_adjusted
ARE,1.171709,1.415928,0.564749,0.849041,0.972596,-1.050421,63968.853871,3.941767,1.264558
BEL,1.638705,1.326213,0.441367,1.390965,1.340684,1.376962,48608.957782,4.108538,1.222865
CHN,-0.254059,0.354875,-0.499395,-0.334415,-0.264475,-1.561287,13572.620725,3.661104,1.334724
CZE,0.536757,1.036739,0.976116,1.035502,0.986401,1.024769,35876.727438,3.674309,1.331423
ESP,0.515630,1.115842,0.413188,0.978825,1.008604,1.041046,37282.442513,3.727412,1.318147
...,...,...,...,...,...,...,...,...,...
SGP,2.088102,2.206245,1.495759,1.825243,2.180612,-0.150454,89386.079400,4.143632,1.214092
SVK,0.229156,0.894068,0.721171,0.646802,0.889052,0.960773,29651.524135,3.336895,1.415776
TKM,-1.518626,-1.126611,-0.189692,-1.489825,-2.090988,-2.170887,13866.324718,2.211396,1.697151
TUR,-0.191561,0.048478,-2.009063,-0.208644,0.196142,-0.608912,26329.360169,3.423693,1.394077


In [None]:
# Save to live
filename = "wb_distance_2016_extra"
s_file_export.f_df_export(df_wb_all,filename,p_file_id=f"{notebook_id}_",p_loc2=path_raw,p_loc2_pre="raw_")

In [154]:
# storing object for matrices
distance_matrices = []

column = 'Logistics Performance Index' #df_wb_complete.columns[7]
#column = "LPI_adjusted"

# for every column

# create zero matrix
df_matrix_zero = pd.DataFrame(np.zeros(shape = (matrix_size, matrix_size)),columns = matrix_index,index = matrix_index)

# for every country (index)
for entry in df_wb_complete.index:

    for entry_l2 in df_wb_complete.index:
        # calculate difference
        temp_difference = (df_wb_complete.loc[entry,column] + df_wb_complete.loc[entry_l2,column]) / 2

        # assign calculated value to correct position in matrix
        df_matrix_zero.loc[entry,entry_l2] = temp_difference

#distance_matrices.append(df_matrix_zero)
df_lpi = df_matrix_zero.copy()

In [153]:
# for all intense and purposes the two dfs are equivalent, no error made
#(df_lpi.to_numpy() - df_lip_transformed.to_numpy()).min()

-2.220446049250313e-16

In [155]:
# transform LPI into a penalty value to be multiplied against distance
# take lpi dataframe and create a matrix (for easier manipulation)
matrix_lpi = df_lpi.to_numpy().copy()
matrix_lpi_transformed = abs(((matrix_lpi - 1) / 4) -1)+1

# back to data frame
df_lip_transformed = pd.DataFrame(matrix_lpi_transformed,index = matrix_index,columns =matrix_index)
df_lip_transformed.head()

Unnamed: 0,ARE,BEL,CHN,CZE,ESP,GEO,HUN,IRQ,ITA,KGZ,KOR,LTU,OMN,SRB,SWE,THA,TJK,UKR,VNM,AFG,ALB,ARM,AUT,BGD,BGR,BIH,BLR,BRN,BTN,CHE,DEU,DNK,EST,FIN,FRA,GRC,HKG,HRV,IDN,IND,IRL,IRN,ISR,JOR,KAZ,KHM,KWT,LAO,LBN,LUX,LVA,MDA,MKD,MMR,MNG,MNE,MYS,NLD,NOR,NPL,PAK,POL,PRT,QAT,ROU,RUS,SAU,SGP,SVK,TKM,TUR,UZB
ARE,1.264558,1.243712,1.299641,1.297991,1.291353,1.463153,1.328658,1.488541,1.287852,1.487788,1.292638,1.303318,1.352991,1.411957,1.231705,1.350392,1.499436,1.415194,1.385201,1.489619,1.455717,1.48159,1.245031,1.424291,1.406319,1.432747,1.457375,1.398468,1.4671,1.258884,1.229033,1.280305,1.336843,1.267186,1.26966,1.35234,1.248641,1.362175,1.384212,1.329774,1.282918,1.432122,1.299738,1.387703,1.413279,1.407205,1.363327,1.498872,1.417622,1.229853,1.341391,1.430495,1.443496,1.449958,1.444022,1.459777,1.328991,1.233838,1.290759,1.460181,1.391877,1.329044,1.331108,1.307375,1.383139,1.435921,1.362732,1.239325,1.340167,1.480855,1.329317,1.456713
BEL,1.243712,1.222865,1.278795,1.277144,1.270506,1.442306,1.307812,1.467695,1.267006,1.466941,1.271792,1.282472,1.332144,1.391111,1.210859,1.329545,1.47859,1.394347,1.364354,1.468773,1.43487,1.460744,1.224185,1.403445,1.385472,1.411901,1.436529,1.377621,1.446253,1.238038,1.208187,1.259459,1.315997,1.24634,1.248814,1.331493,1.227795,1.341329,1.363366,1.308927,1.262072,1.411276,1.278891,1.366856,1.392433,1.386359,1.34248,1.478026,1.396776,1.209007,1.320544,1.409649,1.42265,1.429111,1.423176,1.438931,1.308144,1.212992,1.269912,1.439335,1.37103,1.308198,1.310262,1.286529,1.362293,1.415075,1.341886,1.218479,1.319321,1.460008,1.308471,1.435867
CHN,1.299641,1.278795,1.334724,1.333073,1.326436,1.498235,1.363741,1.523624,1.322935,1.522871,1.327721,1.338401,1.388073,1.44704,1.266788,1.385474,1.534519,1.450277,1.420283,1.524702,1.4908,1.516673,1.280114,1.459374,1.441401,1.46783,1.492458,1.43355,1.502182,1.293967,1.264116,1.315388,1.371926,1.302269,1.304743,1.387423,1.283724,1.397258,1.419295,1.364857,1.318001,1.467205,1.334821,1.422786,1.448362,1.442288,1.39841,1.533955,1.452705,1.264936,1.376474,1.465578,1.478579,1.485041,1.479105,1.49486,1.364074,1.268921,1.325842,1.495264,1.42696,1.364127,1.366191,1.342458,1.418222,1.471004,1.397815,1.274408,1.37525,1.515937,1.3644,1.491796
CZE,1.297991,1.277144,1.333073,1.331423,1.324785,1.496585,1.36209,1.521974,1.321285,1.52122,1.326071,1.33675,1.386423,1.445389,1.265137,1.383824,1.532868,1.448626,1.418633,1.523051,1.489149,1.515022,1.278463,1.457724,1.439751,1.46618,1.490807,1.4319,1.500532,1.292317,1.262466,1.313737,1.370275,1.300618,1.303092,1.385772,1.282074,1.395608,1.417644,1.363206,1.316351,1.465555,1.33317,1.421135,1.446712,1.440638,1.396759,1.532305,1.451054,1.263285,1.374823,1.463928,1.476929,1.48339,1.477454,1.493209,1.362423,1.26727,1.324191,1.493613,1.425309,1.362477,1.36454,1.340807,1.416571,1.469353,1.396164,1.272757,1.3736,1.514287,1.36275,1.490146
ESP,1.291353,1.270506,1.326436,1.324785,1.318147,1.489947,1.355452,1.515336,1.314647,1.514582,1.319433,1.330113,1.379785,1.438751,1.258499,1.377186,1.526231,1.441988,1.411995,1.516413,1.482511,1.508384,1.271825,1.451086,1.433113,1.459542,1.484169,1.425262,1.493894,1.285679,1.255828,1.307099,1.363637,1.29398,1.296454,1.379134,1.275436,1.38897,1.411006,1.356568,1.309713,1.458917,1.326532,1.414497,1.440074,1.434,1.390121,1.525667,1.444417,1.256647,1.368185,1.45729,1.470291,1.476752,1.470816,1.486571,1.355785,1.260632,1.317553,1.486976,1.418671,1.355839,1.357903,1.33417,1.409933,1.462716,1.389527,1.266119,1.366962,1.507649,1.356112,1.483508


<a id="ID_part4"></a>
### Part 4 | Export
|| [0| Default imports](#ID_top) || [1|Part1](#ID_part1) || [2|Part2](#ID_part2) || [3|Part3](#ID_part3) || [4|Part4](#ID_part4) || [5|Part5](#ID_part5) ||

In [87]:
# Economic distance matrix
file_name = f"de_matrix"
s_file_export.f_df_export(df_de.reset_index(),file_name,p_file_id=f"{notebook_id}_")
#s_file_export.f_df_export(df_de_minmax.reset_index(),"de_min_max",p_file_id=f"{notebook_id}_")

Export | ../Data/1_raw_processed_backup/121_store_de_matrix_20200822_1545.csv | COMPLETE
COPY   | ../Data/2_raw_processed_input/121_input_de_matrix.csv.gzip | COMPLETE


In [88]:
# Economic distance matrix
file_name = f"di_matrix"
s_file_export.f_df_export(df_di.reset_index(),file_name,p_file_id=f"{notebook_id}_")
#s_file_export.f_df_export(df_di_minmax.reset_index(),"di_min_max",p_file_id=f"{notebook_id}_")

Export | ../Data/1_raw_processed_backup/121_store_di_matrix_20200822_1545.csv | COMPLETE
COPY   | ../Data/2_raw_processed_input/121_input_di_matrix.csv.gzip | COMPLETE


In [138]:
# Logistics Performance Index matrix
# Economic distance matrix
file_name = f"lpi_matrix"
s_file_export.f_df_export(df_lip_transformed.reset_index(),file_name,p_file_id=f"{notebook_id}_")
#s_file_export.f_df_export(df_di_minmax.reset_index(),"di_min_max",p_file_id=f"{notebook_id}_")

Export | ../Data/1_raw_processed_backup/121_store_lpi_matrix_20200822_1703.csv | COMPLETE
COPY   | ../Data/2_raw_processed_input/121_input_lpi_matrix.csv.gzip | COMPLETE


<a id="ID_part5"></a>
### Part 5
|| [0| Default imports](#ID_top) || [1|Part1](#ID_part1) || [2|Part2](#ID_part2) || [3|Part3](#ID_part3) || [4|Part4](#ID_part4) || [5|Part5](#ID_part5) ||