In [1]:
notebook_id = "131"

<a id="ID_top"></a>
## Country data organisation

This workflow generates (part1), maintains (part2) and exports (part3)

#### Notebook sections:
    
|| [0| Default imports](#ID_top) || [1|Part1 Reference table generation](#ID_part1) || [2|Part2 Table maintanence](#ID_part2) || [3|Part3 Table export](#ID_part3) || 

#### Import all packages that could be required

In [4]:
# %load s_package_import.py
# package library, use to ensure consistency across notebooks, refresh periodically
# general packages
import os # use with os.listdir(_path_)
import requests
import csv
import time
from datetime import datetime
from shutil import copyfile

#temp check
#from shutil import make_archive
import zipfile #notebook

# data analysis packages
import pandas as pd
pd.options.display.max_columns = None # don't truncate columns
#pd.options.display.max_rows = None

import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import descartes
import pycountry

# custom scripts
import s_file_export
import s_filepaths
import s_un_comtrade_extract as s_un
import s_adj_matrix_plot

#=== network analysis
import networkx as nx
#=== gavity modelling
import gme as gme

#=== distance datasets
import wbdata


#### Import module and declare path variables
`import s_filepaths.py`

In [5]:
# import ref file
import s_filepaths

# declare local variables to work with
path_raw = s_filepaths.path_raw
path_raw_dl = s_filepaths.path_raw_dl
path_store = s_filepaths.path_store
path_live = s_filepaths.path_live

<a id="ID_part1"></a>
### Part 1 | Load and Merge
|| [0| Default imports](#ID_top) || [1|Part1](#ID_part1) || [2|Part2](#ID_part2) || [3|Part3](#ID_part3) || [4|Part4](#ID_part4) || [5|Part5](#ID_part5) ||

**Load in belt and road initiative countries**

In [6]:
# View all files in live directory
print(os.listdir(f"{path_live}"))

['input_101_master_country_table.csv.csv.gzip', 'input_test.csv.gzip', 'input_un_com_2013.csv.gzip', 'input_un_com_2012.csv.gzip', 'input_un_com_2006-2009.csv.gzip', '.DS_Store', 'input_101_master_country_table.csv.gzip', 'input_un_codes_ref.csv.gzip', 'input_111_di_matrix.csv.gzip', 'input_121_gme_data_joined.csv.gzip', 'input_bri_countries_manual_2020.csv.gzip', 'input_un_com_2016-2019.csv.gzip', 'input_un_com_2002-2005.csv.gzip', 'input_un_com_2014.csv.gzip', 'input_un_com_2015.csv.gzip', 'input_bri_countries_Dumor_Yao.csv.gzip', '2_raw_explainer_doc.md', 'input_dynamic_gravity.csv.gzip', 'input_un_com_2010_merged_ref.csv.gzip', 'input_111_de_matrix.csv.gzip', 'input_gme_data_joined.csv.gzip', 'input_un_sample.csv.gzip']


In [8]:
# import gme data (121)
df_grav_flow = pd.read_csv(f"{path_live}/input_121_gme_data_joined.csv.gzip",compression="gzip")
df_grav_flow.head()

Unnamed: 0,year,country_o,iso3_o,country_d,iso3_d,distance,gdp_wdi_const_o,gdp_wdi_const_d,common_language,contiguity,agree_pta_goods,agree_cu,sanction_imposition,rtCode,rt3ISO,rtTitle,ptCode,pt3ISO,ptTitle,period,rgDesc,yr,rgCode,cmdCode,TradeValue,periodDesc,pfCode,cmdDescE
0,2009,Philippines,PHL,Aruba,ABW,16904.596,185437700000.0,,1,0,0,0,0.0,608,PHL,Philippines,533,ABW,Aruba,2009,Import,2009,1,TOTAL,72162,2009,H2,ALL COMMODITIES
1,2009,Philippines,PHL,Aruba,ABW,16904.596,185437700000.0,,1,0,0,0,0.0,608,PHL,Philippines,533,ABW,Aruba,2009,Export,2009,2,TOTAL,149587,2009,H2,ALL COMMODITIES
2,2009,Romania,ROU,Afghanistan,AFG,1883.9504,169350300000.0,14697330000.0,0,0,0,0,0.0,642,ROU,Romania,4,AFG,Afghanistan,2009,Import,2009,1,TOTAL,1688,2009,H3,All Commodities
3,2009,Romania,ROU,Afghanistan,AFG,1883.9504,169350300000.0,14697330000.0,0,0,0,0,0.0,642,ROU,Romania,4,AFG,Afghanistan,2009,Export,2009,2,TOTAL,15843818,2009,H3,All Commodities
4,2010,Denmark,DNK,Afghanistan,AFG,4835.0132,321993900000.0,15936800000.0,0,0,0,0,0.0,208,DNK,Denmark,4,AFG,Afghanistan,2010,Import,2010,1,TOTAL,5267969,2010,H3,All Commodities


In [14]:
# import distance measures
df_de = pd.read_csv(f"{path_live}/input_111_de_matrix.csv.gzip",compression="gzip",index_col="index")
df_di = pd.read_csv(f"{path_live}/input_111_di_matrix.csv.gzip",compression="gzip",index_col="index")

In [30]:
# create series based on combo of iso3_o and iso3_d
paired_values = []

for entry in list(df_grav_flow.index):
    # pair of countries in gravity dataset
    df_pair = (df_grav_flow.iloc[entry].iso3_o, df_grav_flow.iloc[entry].iso3_d)

    try: 
        distance_value = df_de.loc[df_pair[0],df_pair[1]]
        paired_values.append(distance_value)
    except:
        paired_values.append(np.NaN)
        
df_grav_flow["economic_distance"] = paired_values

In [36]:
# create series based on combo of iso3_o and iso3_d
paired_values = []

for entry in list(df_grav_flow.index):
    # pair of countries in gravity dataset
    df_pair = (df_grav_flow.iloc[entry].iso3_o, df_grav_flow.iloc[entry].iso3_d)

    try: 
        distance_value = df_di.loc[df_pair[0],df_pair[1]]
        paired_values.append(distance_value)
    except:
        paired_values.append(np.NaN)
        
df_grav_flow["institutional_distance"] = paired_values

<a id="ID_part2"></a>
### Part 2 | Export
|| [0| Default imports](#ID_top) || [1|Part1](#ID_part1) || [2|Part2](#ID_part2) || [3|Part3](#ID_part3) || [4|Part4](#ID_part4) || [5|Part5](#ID_part5) ||

In [37]:
# Economic distance matrix
file_name = f"{notebook_id}_gme_flow_distance"
s_file_export.f_df_export(df_grav_flow,file_name)

Export | ../Data/1_raw_processed_backup/store_131_gme_flow_distance_20200808_1722.csv | COMPLETE
COPY   | ../Data/2_raw_processed_input/input_131_gme_flow_distance.csv.gzip | COMPLETE


***

<a id="ID_part3"></a>
### Part 3
|| [0| Default imports](#ID_top) || [1|Part1](#ID_part1) || [2|Part2](#ID_part2) || [3|Part3](#ID_part3) || [4|Part4](#ID_part4) || [5|Part5](#ID_part5) ||

<a id="ID_part4"></a>
### Part 4
|| [0| Default imports](#ID_top) || [1|Part1](#ID_part1) || [2|Part2](#ID_part2) || [3|Part3](#ID_part3) || [4|Part4](#ID_part4) || [5|Part5](#ID_part5) ||

<a id="ID_part5"></a>
### Part 5
|| [0| Default imports](#ID_top) || [1|Part1](#ID_part1) || [2|Part2](#ID_part2) || [3|Part3](#ID_part3) || [4|Part4](#ID_part4) || [5|Part5](#ID_part5) ||