[The Atlas of Economic Complexity - Harvard](https://intl-atlas-downloads.s3.amazonaws.com/data_17_0.h5)

[Gravity Portal: Dynamic Gravity Dataset](https://www.usitc.gov/data/gravity/dataset.htm)

[WTO - Bulk download of trade datasets](https://www.wto.org/english/res_e/statis_e/trade_datasets_e.htm)

In [1]:
import tables as tb
import pandas as pd

In [2]:
in_f = "../data/data_17_0.h5"
out_f = "../data/transformed_data.h5"
keys_columns = {
    "/classifications/hs_product": [
        "code",
        "name",
        "level",
        "name_en",
        "name_short_en",
        "parent_id",
    ],
    "/classifications/location": [
        "code",
        "level",
        "name_en",
        "name_short_en",
        "iso2",
        "parent_id",
        "name",
    ],
    "/country": [
        "location_id",
        "export_value",
        "import_value",
        "gdp_year",
        "gdppc_year",
        "gdp_ppp_year",
        "gdppc_ppp_year",
        "population_year",
    ],
    "/country_hsproduct2digit_year": ["location_id", "product_id", "year"],
    "/country_hsproduct4digit_year": ["location_id", "product_id", "year"],
    "/country_hsproduct6digit_year": ["location_id", "product_id", "year"],
    "/country_hsproductsection_year": ["location_id", "product_id", "year"],
    "/country_partner_hsproduct2digit_year": [
        "location_id",
        "product_id",
        "year",
        "partner_id",
    ],
    "/country_partner_hsproduct4digit_year": [
        "location_id",
        "product_id",
        "year",
        "partner_id",
    ],
    "/country_partner_hsproduct6digit_year": [
        "location_id",
        "product_id",
        "year",
        "partner_id",
    ],
    "/country_partner_hsproductsection_year": [
        "location_id",
        "product_id",
        "year",
        "partner_id",
    ],
}

In [5]:
def get_columns(store, key):
    return store.select(key, stop=1).columns


def transform(in_f, out_f, key, data_columns):
    with pd.HDFStore(in_f, mode="r+") as store:
        df = store.select(key)
        df.pipe(
            lambda df: df.to_hdf(
                out_f, key, format="table", data_columns=data_columns, mode="a"
            )
        )
        store.close()

In [6]:
failed = []
for key, columns in keys_columns.items():
    try:
        print(f"Transforming {key}")
        transform(in_f, out_f, key, columns)
    except Exception as e:
        failed.append((key, e))
        print(f"error with {key}")

Transforming /classifications/hs_product
Transforming /classifications/location
Transforming /country
Transforming /country_hsproduct2digit_year
Transforming /country_hsproduct4digit_year
Transforming /country_hsproduct6digit_year
Transforming /country_hsproductsection_year
Transforming /country_partner_hsproduct2digit_year
Transforming /country_partner_hsproduct4digit_year


  expected_mb = (expectedrows * rowsize) // MB


Transforming /country_partner_hsproduct6digit_year
Transforming /country_partner_hsproductsection_year


In [None]:
failed

In [2]:
d = pd.read_csv("../data/release_1.0_2005_2016.csv")

In [5]:
d.head().columns

Index(['year', 'country_d', 'iso3_d', 'dynamic_code_d', 'landlocked_d',
       'island_d', 'region_d', 'gdp_pwt_const_d', 'pop_d', 'gdp_pwt_cur_d',
       'capital_cur_d', 'capital_const_d', 'gdp_wdi_cur_d', 'gdp_wdi_const_d',
       'gdp_wdi_cap_cur_d', 'gdp_wdi_cap_const_d', 'lat_d', 'lng_d',
       'polity_d', 'polity_abs_d', 'country_o', 'iso3_o', 'dynamic_code_o',
       'landlocked_o', 'island_o', 'region_o', 'gdp_pwt_const_o', 'pop_o',
       'gdp_pwt_cur_o', 'capital_cur_o', 'capital_const_o', 'gdp_wdi_cur_o',
       'gdp_wdi_const_o', 'gdp_wdi_cap_cur_o', 'gdp_wdi_cap_const_o', 'lat_o',
       'lng_o', 'polity_o', 'polity_abs_o', 'contiguity', 'agree_pta_goods',
       'agree_pta_services', 'agree_cu', 'agree_eia', 'agree_fta', 'agree_psa',
       'agree_pta', 'sanction_threat', 'sanction_threat_trade',
       'sanction_imposition', 'sanction_imposition_trade', 'member_eu_o',
       'member_wto_o', 'member_gatt_o', 'member_eu_d', 'member_wto_d',
       'member_gatt_d', 'member

In [9]:
d.pipe(lambda df: df.loc[(df.iso3_d == "USA") & (df.year == 2016)])

Unnamed: 0,year,country_d,iso3_d,dynamic_code_d,landlocked_d,island_d,region_d,gdp_pwt_const_d,pop_d,gdp_pwt_cur_d,...,hostility_level_o,hostility_level_d,distance,common_language,colony_of_destination_after45,colony_of_destination_current,colony_of_destination_ever,colony_of_origin_after45,colony_of_origin_current,colony_of_origin_ever
2802,2016,United States,USA,USA,0,0,north_america,,,,...,0,0,13902.0410,1,0,0,0,0,0,0
704077,2016,United States,USA,USA,0,0,north_america,,,,...,0,0,10253.1780,0,0,0,0,0,0,0
704078,2016,United States,USA,USA,0,0,north_america,,,,...,0,0,3823.1731,1,0,0,0,0,0,0
704079,2016,United States,USA,USA,0,0,north_america,,,,...,0,0,11622.3140,0,0,0,0,0,0,0
704080,2016,United States,USA,USA,0,0,north_america,,,,...,0,0,7330.3726,0,0,0,0,0,0,0
704081,2016,United States,USA,USA,0,0,north_america,,,,...,0,0,9915.9180,1,1,0,1,0,0,0
704082,2016,United States,USA,USA,0,0,north_america,,,,...,0,0,10257.6660,0,0,0,0,0,0,0
704083,2016,United States,USA,USA,0,0,north_america,,,,...,0,0,8044.1089,0,0,0,0,0,0,0
704084,2016,United States,USA,USA,0,0,north_america,,,,...,0,0,10382.8580,1,0,0,0,0,0,0
704085,2016,United States,USA,USA,0,0,north_america,,,,...,0,0,12601.6530,1,0,0,0,0,0,0
