In [1]:
import pandas as pd
import json

In [2]:
df_location = pd.read_stata('../dataset/classifications_data/location.dta')
df_data_class = pd.read_stata('../dataset/classifications_data/sitc_product.dta')

In [3]:
folder_path = '../dataset/dataverse_files/'
country_partner_sitc_4digit = 'country_partner_sitcproduct4digit_year_{}.csv'
country_partner_sitc_2digit = 'country_partner_sitcproduct2digit_year.csv'
country_partner_sitc_section = 'country_partner_sitcproductsection_year.csv'
country_sitc_2digit = 'country_sitcproduct2digit_year.csv'
country_sitc_4digit = 'country_sitcproduct4digit_year.csv'
country_sitc_section = 'country_sitcproductsection_year.csv'
sitc_2digit=2
sitc_4digit=4

def get_data(country_partner=True, sitc_digit=4, year=2019):
    """
    Creates a Dataframe for a specified SITC dataset

    Args:
        country_partner: If True, dataset with trades between countries and partners are selected
        sitc_digit: 4 for SITC-4 digit products, 2 for SITC-2 digit products, otw SITC product section
        year: Year between 1962 and 2019 for the country-partner SITC-4 digit products
    Returns:
        Dataframe of the selected dataset
    """
    path = folder_path
    if country_partner:
        if sitc_digit==sitc_4digit:
            path += country_partner_sitc_4digit.format(year)
        elif sitc_digit==sitc_2digit:
            path += country_partner_sitc_2digit
        else:
            path += country_partner_sitc_section
    else:
        if sitc_digit==sitc_4digit:
            path += country_sitc_4digit
        elif sitc_digit==sitc_2digit:
            path += country_sitc_2digit
        else:
            path += country_sitc_section

    return pd.read_csv(path)

In [4]:
# Opening JSON file
f = open('../dataset/classifications_data/ne_110m_admin_0_countries.geojson')
 
# returns JSON object as
# a dictionary
data = json.load(f)
f.close()

In [5]:
def fix_country_codes(name):
    if name == "France":
        return ["FR","FRA"]
    elif name == "Norway":
        return ["NO","NOR"]
    elif name == "Kosovo":
        return ["XK","XKX"]        
    else:
        if name != "Northern Cyprus" and name != "Somaliland":
            print(name)
        return ["-99","-99"]

In [6]:
result = {}
for YEAR in range(1962, 2020):
    df_2019 = get_data(year=YEAR)
    countries = pd.read_csv('../dataset/classifications_data/countries.csv') 
    countries = countries.rename(columns={"country": "ISO_A2"})
    countries["ISO_A3"] = 0
    for i in data["features"]: 
        if i["properties"]["ISO_A3"] == "-99":
            a2, a3 = fix_country_codes(i["properties"]["ADMIN"])
            countries.loc[countries.ISO_A2 == a2, 'ISO_A3'] = a3        
        else:
            countries.loc[countries.ISO_A2 == i["properties"]["ISO_A2"], 'ISO_A3'] = i["properties"]["ISO_A3"]
    countries = countries.drop(countries[countries.ISO_A3 == 0].index)
    countries["location_id"] = -1
    for index, row in df_location.iterrows():    
        countries.loc[countries.ISO_A3 == row['location_code'], 'location_id'] = row['location_id']
        countries.loc[countries.ISO_A3 == row['location_code'], 'name'] = row['location_name_short_en']
    countries[countries.location_id == -1]
    data_df = df_2019.drop(["product_id","year","sitc_eci","sitc_coi","location_code","partner_code","sitc_product_code"],axis = 1)
    data_df = data_df.groupby(["location_id","partner_id"], sort=True).sum().reset_index()
    data_df = data_df.drop(data_df[~data_df.location_id.isin(countries.location_id)].index)
    data_df = data_df.drop(data_df[~data_df.partner_id.isin(countries.location_id)].index)
    
    temp = {}
    for mode in ["import_value","export_value"]:
        dummy = data_df.groupby('location_id').apply(lambda x : x.nlargest(20, mode)).reset_index(drop = True)
        if mode == "import_value":
            dummy = dummy.drop("export_value",axis = 1)
        else:
            dummy = dummy.drop("import_value",axis = 1)
        dummy = dummy.astype({"location_id": str})
        dummy = dummy.astype({"partner_id": str})
        dummy["main_code"] = -1
        dummy["main_name"] = -1
#         dummy["main_lat"] = -1
#         dummy["main_lon"] = -1
        dummy["partner_code"] = -1
        dummy["partner_name"] = -1
#         dummy["partner_lat"] = -1
#         dummy["partner_lon"] = -1

        for index, row in countries.iterrows():    
            dummy.loc[dummy.location_id == row['location_id'], 'main_code'] = row['ISO_A2']
#             dummy.loc[dummy.location_id == str(row['location_id']), 'main_lat'] = row['latitude']
#             dummy.loc[dummy.location_id == str(row['location_id']), 'main_lon'] = row['longitude']
            dummy.loc[dummy.location_id == str(row['location_id']), 'main_name'] = row['name']

            dummy.loc[dummy.partner_id == str(row['location_id']), 'partner_code'] = row['ISO_A2']
#             dummy.loc[dummy.partner_id == str(row['location_id']), 'partner_lat'] = row['latitude']
#             dummy.loc[dummy.partner_id == str(row['location_id']), 'partner_lon'] = row['longitude']
            dummy.loc[dummy.partner_id == str(row['location_id']), 'partner_name'] = row['name']

        dummy = dummy.drop(["location_id","partner_id"],axis = 1)  
        #Add percentage for opacity
        sums = dummy.groupby("main_code")[mode].sum()
        dummy["percentage"] = -1
        for index, row in dummy.iterrows():
             dummy.loc[index, "percentage"] = row[mode] / sums[row["main_code"]]
          
        dummy = dummy.fillna(0)
        for code in dummy.main_code.unique():
            if  code not in temp:
                temp[code] = {}
            temp[code][mode] = dummy[dummy.main_code == code].to_numpy().tolist()
    
    result[YEAR] = temp
    if YEAR % 10 == 0:
        print(YEAR)



1970


  if (await self.run_code(code, result,  async_=asy)):


1980
1990
2000
2010


In [7]:
with open("trade_data.json", "w") as outfile:
    json.dump(result, outfile)

In [8]:
# Opening JSON file
f = open("geo_export.json")
 
# returns JSON object as
# a dictionary
data = json.load(f)
f.close() 

In [9]:
data.keys()

dict_keys(['AFG', 'AGO', 'ALB', 'ARE', 'ARG', 'ARM', 'ATA', 'ATF', 'AUS', 'AUT', 'AZE', 'BDI', 'BEL', 'BEN', 'BFA', 'BGD', 'BGR', 'BHS', 'BIH', 'BLR', 'BLZ', 'BOL', 'BRA', 'BRN', 'BTN', 'BWA', 'CAF', 'CAN', 'CHE', 'CHL', 'CHN', 'CIV', 'CMR', 'COD', 'COG', 'COL', 'CRI', 'CUB', 'CYP', 'CZE', 'DEU', 'DJI', 'DNK', 'DOM', 'DZA', 'ECU', 'EGY', 'ERI', 'ESH', 'ESP', 'EST', 'ETH', 'FIN', 'FJI', 'FLK', 'FRA', 'GAB', 'GBR', 'GEO', 'GHA', 'GIN', 'GMB', 'GNB', 'GNQ', 'GRC', 'GRL', 'GTM', 'GUY', 'HND', 'HRV', 'HTI', 'HUN', 'IDN', 'IND', 'IRL', 'IRN', 'IRQ', 'ISL', 'ISR', 'ITA', 'JAM', 'JOR', 'JPN', 'KAZ', 'KEN', 'KGZ', 'KHM', 'KOR', 'KWT', 'LAO', 'LBN', 'LBR', 'LBY', 'LKA', 'LSO', 'LTU', 'LUX', 'LVA', 'MAR', 'MDA', 'MDG', 'MEX', 'MKD', 'MLI', 'MMR', 'MNE', 'MNG', 'MOZ', 'MRT', 'MWI', 'MYS', 'NCL', 'NER', 'NGA', 'NIC', 'NLD', 'NOR', 'NPL', 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PNG', 'POL', 'PRK', 'PRT', 'PRY', 'PSE', 'QAT', 'ROU', 'RUS', 'RWA', 'SAU', 'SDN', 'SEN', 'SLB', 'SLE', 'SLV', 'SOM', 'S

In [10]:
# Opening JSON file
f = open("trade_data.json")
 
# returns JSON object as
# a dictionary
data2 = json.load(f)
f.close() 
data2.keys()

dict_keys(['1962', '1963', '1964', '1965', '1966', '1967', '1968', '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019'])

In [11]:
data2["2019"].keys()

dict_keys(['AF', 'AO', 'AL', 'AE', 'AR', 'AM', 'AQ', 'TF', 'AU', 'AT', 'AZ', 'BI', 'BE', 'BJ', 'BF', 'BD', 'BG', 'BS', 'BA', 'BY', 'BZ', 'BO', 'BR', 'BN', 'BT', 'BW', 'CF', 'CA', 'CH', 'CL', 'CN', 'CI', 'CM', 'CD', 'CG', 'CO', 'CR', 'CU', 'CY', 'CZ', 'DE', 'DJ', 'DK', 'DO', 'DZ', 'EC', 'EG', 'ER', 'EH', 'ES', 'EE', 'ET', 'FI', 'FJ', 'FK', 'FR', 'GA', 'GB', 'GE', 'GH', 'GN', 'GM', 'GW', 'GQ', 'GR', 'GL', 'GT', 'GY', 'HN', 'HR', 'HT', 'HU', 'ID', 'IN', 'IE', 'IR', 'IQ', 'IS', 'IL', 'IT', 'JM', 'JO', 'JP', 'KZ', 'KE', 'KG', 'KH', 'KR', 'KW', 'LA', 'LB', 'LR', 'LY', 'LK', 'LS', 'LT', 'LU', 'LV', 'MA', 'MD', 'MG', 'MX', 'MK', 'ML', 'MM', 'ME', 'MN', 'MZ', 'MR', 'MW', 'MY', 'NC', 'NE', 'NG', 'NI', 'NL', 'NO', 'NP', 'NZ', 'OM', 'PK', 'PA', 'PE', 'PH', 'PG', 'PL', 'KP', 'PT', 'PY', 'PS', 'QA', 'RO', 'RU', 'RW', 'SA', 'SD', 'SN', 'SB', 'SL', 'SV', 'SO', 'RS', 'SR', 'SK', 'SI', 'SE', 'SZ', 'SY', 'TD', 'TG', 'TH', 'TJ', 'TM', 'TL', 'TT', 'TN', 'TR', 'TZ', 'UG', 'UA', 'UY', 'US', 'UZ', 'VE', 'VN',

In [12]:
data2["2019"]["AFG"]["export_value"] == data["AFG"]

KeyError: 'AFG'

In [None]:
data2["2019"]["AFG"]["export_value"][0]

In [None]:
data["AFG"][0]

In [None]:
for idx,i in enumerate(data2["2000"].values()):
    if idx == 32150:
        print(i)

In [None]:
len(data2["2000"].values())

In [None]:
f = open("trade_data.json", "r")
jsonstr = f.read()
jsonstr[32151]

In [None]:
jsonstr[32000:32160]