In [1]:
import pandas as pd
import json

In [2]:
df_location = pd.read_stata('./dataset/classifications_data/location.dta')
df_data_class = pd.read_stata('./dataset/classifications_data/sitc_product.dta')

In [3]:
df_data_class.head(15)

Unnamed: 0,product_id,sitc_product_code,sitc_product_name_short_en,level,parent_id
0,0,0,Food,section,
1,1,1,Beverages,section,
2,2,2,Crude materials,section,
3,3,3,Fuels,section,
4,4,4,Vegetable oils,section,
5,5,5,Chemicals,section,
6,6,6,Material manufacturers,section,
7,7,7,Machinery and vehicles,section,
8,8,8,Other manufacturers,section,
9,9,9,Unspecified,section,


In [4]:
folder_path = './dataset/dataverse_files/'
country_partner_sitc_4digit = 'country_partner_sitcproduct4digit_year_{}.csv'
country_partner_sitc_2digit = 'country_partner_sitcproduct2digit_year.csv'
country_partner_sitc_section = 'country_partner_sitcproductsection_year.csv'
country_sitc_2digit = 'country_sitcproduct2digit_year.csv'
country_sitc_4digit = 'country_sitcproduct4digit_year.csv'
country_sitc_section = 'country_sitcproductsection_year.csv'
sitc_2digit=2
sitc_4digit=4

def get_data(country_partner=True, sitc_digit=4, year=2019):
    """
    Creates a Dataframe for a specified SITC dataset

    Args:
        country_partner: If True, dataset with trades between countries and partners are selected
        sitc_digit: 4 for SITC-4 digit products, 2 for SITC-2 digit products, otw SITC product section
        year: Year between 1962 and 2019 for the country-partner SITC-4 digit products
    Returns:
        Dataframe of the selected dataset
    """
    path = folder_path
    if country_partner:
        if sitc_digit==sitc_4digit:
            path += country_partner_sitc_4digit.format(year)
        elif sitc_digit==sitc_2digit:
            path += country_partner_sitc_2digit
        else:
            path += country_partner_sitc_section
    else:
        if sitc_digit==sitc_4digit:
            path += country_sitc_4digit
        elif sitc_digit==sitc_2digit:
            path += country_sitc_2digit
        else:
            path += country_sitc_section

    return pd.read_csv(path)

In [14]:
# Opening JSON file
f = open('./dataset/classifications_data/ne_110m_admin_0_countries.geojson', encoding="utf8")
 
# returns JSON object as
# a dictionary
data = json.load(f)
f.close()

In [8]:
def fix_country_codes(name):
    if name == "France":
        return ["FR","FRA"]
    elif name == "Norway":
        return ["NO","NOR"]
    elif name == "Kosovo":
        return ["XK","XKX"] 
#     elif name == "Namibia":
#         return ["NA","NAM"] 
    else:
        if name != "Northern Cyprus" and name != "Somaliland":
            print(name)
        return ["-99","-99"]

In [43]:
import warnings
import numpy as np
warnings.filterwarnings('ignore')
#pd.set_option('display.max_rows', 80)
product_dict = {-1:"all", 0:"food", 1:"beverage", 2:"crude_materials" , 3:"fuels", 4:"vegetable_oil", 5:"chemicals", 6:"material_manufacturers", 7:"machinery", 8:"other_manufacturers", 9:"unspecified"}

class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)

for Product_id in range(10): #Change this variable to filter based on product types
    result = {}
    print(product_dict[Product_id])
    for YEAR in range(1962, 2020):
        total_exports_year = 0
        total_imports_year = 0
        df_year = get_data(year=YEAR)

        product_parents = {}
        for idx, row in df_data_class.iterrows():
            key = row.product_id
            product_id = key
            parent_id = row.parent_id
            while(product_id > 10):
                row = df_data_class[df_data_class.product_id == parent_id].iloc[[0]]
                product_id = row.product_id.item()
                parent_id  = row.parent_id.item()        

            product_parents[key] = product_id

        df_year.product_id = df_year.product_id.map(product_parents)
        df_year = df_year.drop(df_year[df_year.product_id != Product_id].index)     

        countries = pd.read_csv('./dataset/classifications_data/countries.csv') 
        countries.loc[countries.name == "Namibia", "country"] = "NA"
        #display(countries[countries.name == "Namibia"])
        countries = countries.rename(columns={"country": "ISO_A2"})
        countries["ISO_A3"] = 0
        for i in data["features"]: 
            if i["properties"]["ISO_A3"] == "-99":
                a2, a3 = fix_country_codes(i["properties"]["ADMIN"])
                countries.loc[countries.ISO_A2 == a2, 'ISO_A3'] = a3        
            else:
                countries.loc[countries.ISO_A2 == i["properties"]["ISO_A2"], 'ISO_A3'] = i["properties"]["ISO_A3"]
        #display(countries[countries.ISO_A3 == 0].head(72))
        countries = countries.drop(countries[countries.ISO_A3 == 0].index)
        countries["location_id"] = -1
        for index, row in df_location.iterrows():    
            countries.loc[countries.ISO_A3 == row['location_code'], 'location_id'] = row['location_id']
            countries.loc[countries.ISO_A3 == row['location_code'], 'name'] = row['location_name_short_en']
        countries[countries.location_id == -1]
        data_df = df_year.drop(["product_id","year","sitc_eci","sitc_coi","location_code","partner_code","sitc_product_code"],axis = 1)
        data_df = data_df.groupby(["location_id","partner_id"], sort=True).sum().reset_index()
        data_df = data_df.drop(data_df[~data_df.location_id.isin(countries.location_id.astype(int))].index)
        data_df = data_df.drop(data_df[~data_df.partner_id.isin(countries.location_id.astype(int))].index)

        temp = {}
        for mode in ["import_value","export_value"]:
            dummy = data_df.groupby('location_id').apply(lambda x : x.nlargest(20, mode)).reset_index(drop = True)
            if mode == "import_value":
                dummy = dummy.drop("export_value",axis = 1)
            else:
                dummy = dummy.drop("import_value",axis = 1)
            dummy = dummy.astype({"location_id": str})
            dummy = dummy.astype({"partner_id": str})
            dummy["main_code"] = -1
            dummy["main_name"] = -1
            dummy["partner_code"] = -1
            dummy["partner_name"] = -1

            for index, row in countries.iterrows():    
                dummy.loc[dummy.location_id == row['location_id'], 'main_code'] = row['ISO_A2']
                dummy.loc[dummy.location_id == str(row['location_id']), 'main_name'] = row['name']

                dummy.loc[dummy.partner_id == str(row['location_id']), 'partner_code'] = row['ISO_A2']
                dummy.loc[dummy.partner_id == str(row['location_id']), 'partner_name'] = row['name']

            dummy = dummy.drop(["location_id","partner_id"],axis = 1) 
            #Add percentage for opacity
            sums = dummy.groupby("main_code")[mode].sum()            
            dummy["percentage"] = -1
            # print(dummy)
            for index, row in dummy.iterrows():
                dummy.loc[index, "percentage"] = 100*row[mode] / sums[row["main_code"]]

            dummy = dummy.fillna(-1)
            for code in dummy.main_code.unique():
                if code not in temp:
                    temp[code] = {}
                temp[code][mode] = dummy[dummy.main_code == code].to_numpy().tolist()
                if mode == "import_value":
                    total_imports = dummy[dummy.main_code == code][mode].sum()
                    total_imports_year += total_imports
                    temp[code]["total_imports"] = total_imports
                else:
                    total_exports = dummy[dummy.main_code == code][mode].sum()
                    total_exports_year += total_exports
                    temp[code]["total_exports"] = total_exports


            for code in temp:
                if mode == "import_value":
                    temp[code]["percentage_imports"] = 100*temp[code]["total_imports"] / total_imports_year
                else:
                    temp[code]["percentage_exports"] = 100*temp[code]["total_exports"] / total_exports_year


        result[YEAR] = temp
        if YEAR % 10 == 0:
            print(YEAR)



# Your codes .... 
# json.dumps(data, cls=NpEncoder)
            
    with open(f'trade_data_{product_dict[Product_id]}.json', "w") as outfile:
        json.dump(result, outfile, cls=NpEncoder)
            
            

food
1970
1980
1990
2000
2010
beverage
1970
1980
1990
2000
2010
crude_materials
1970
1980
1990
2000
2010
fuels
1970
1980
1990
2000
2010
vegetable_oil
1970
1980
1990
2000
2010
chemicals
1970
1980
1990
2000
2010
material_manufacturers
1970
1980
1990
2000
2010
machinery
1970
1980
1990
2000
2010
other_manufacturers
1970
1980
1990
2000
2010
unspecified
1970
1980
1990
2000
2010


In [None]:
for year in range(1962, 2019+1, 1):
    f = open('./dataset/classifications_data/ne_110m_admin_0_countries.geojson', encoding="utf8")
 
# returns JSON object as
# a dictionary
data = json.load(f)
f.close()
    with open(f'trade_data_{product_dict[Product_id]}.json', "w") as outfile:
    json.dump(result, outfile, cls=NpEncoder)

In [None]:
# Opening JSON file
f = open("geo_export.json")
 
# returns JSON object as
# a dictionary
data = json.load(f)
f.close() 

In [None]:
with open(f'trade_data_{product_dict[Product_id]}.json', "w") as outfile:
    json.dump(result, outfile)

In [None]:
data.keys()

In [None]:
# Opening JSON file
f = open("trade_data.json")
 
# returns JSON object as
# a dictionary
data2 = json.load(f)
f.close() 
data2.keys()

In [None]:
data2["2019"].keys()

In [None]:
data2["2019"]["AFG"]["export_value"] == data["AFG"]

In [None]:
data2["2019"]["AFG"]["export_value"][0]

In [None]:
data["AFG"][0]

In [None]:
for idx,i in enumerate(data2["2000"].values()):
    if idx == 32150:
        print(i)

In [None]:
len(data2["2000"].values())

In [None]:
f = open("trade_data.json", "r")
jsonstr = f.read()
jsonstr[32151]

In [None]:
jsonstr[32000:32160]