[The Atlas of Economic Complexity - Harvard](https://intl-atlas-downloads.s3.amazonaws.com/data_17_0.h5)

[Gravity Portal: Dynamic Gravity Dataset](https://www.usitc.gov/data/gravity/dataset.htm)

In [1]:
import tables as tb
import pandas as pd

In [2]:
in_f = "../data/data_17_0.h5"
out_f = "../data/transformed_data.h5"
keys_columns = {
    "/classifications/hs_product": [
        "code",
        "name",
        "level",
        "name_en",
        "name_short_en",
        "parent_id",
    ],
    "/classifications/location": [
        "code",
        "level",
        "name_en",
        "name_short_en",
        "iso2",
        "parent_id",
        "name",
    ],
    "/country": [
        "location_id",
        "export_value",
        "import_value",
        "gdp_year",
        "gdppc_year",
        "gdp_ppp_year",
        "gdppc_ppp_year",
        "population_year",
    ],
    "/country_hsproduct2digit_year": ["location_id", "product_id", "year"],
    "/country_hsproduct4digit_year": ["location_id", "product_id", "year"],
    "/country_hsproduct6digit_year": ["location_id", "product_id", "year"],
    "/country_hsproductsection_year": ["location_id", "product_id", "year"],
    "/country_partner_hsproduct2digit_year": [
        "location_id",
        "product_id",
        "year",
        "partner_id",
    ],
    "/country_partner_hsproduct4digit_year": [
        "location_id",
        "product_id",
        "year",
        "partner_id",
    ],
    "/country_partner_hsproduct6digit_year": [
        "location_id",
        "product_id",
        "year",
        "partner_id",
    ],
    "/country_partner_hsproductsection_year": [
        "location_id",
        "product_id",
        "year",
        "partner_id",
    ],
}

In [5]:
def get_columns(store, key):
    return store.select(key, stop=1).columns


def transform(in_f, out_f, key, data_columns):
    with pd.HDFStore(in_f, mode="r+") as store:
        df = store.select(key)
        df.pipe(
            lambda df: df.to_hdf(
                out_f, key, format="table", data_columns=data_columns, mode="a"
            )
        )
        store.close()

In [6]:
failed = []
for key, columns in keys_columns.items():
    try:
        print(f"Transforming {key}")
        transform(in_f, out_f, key, columns)
    except Exception as e:
        failed.append((key, e))
        print(f"error with {key}")

Transforming /classifications/hs_product
Transforming /classifications/location
Transforming /country
Transforming /country_hsproduct2digit_year
Transforming /country_hsproduct4digit_year
Transforming /country_hsproduct6digit_year
Transforming /country_hsproductsection_year
Transforming /country_partner_hsproduct2digit_year
Transforming /country_partner_hsproduct4digit_year


  expected_mb = (expectedrows * rowsize) // MB


Transforming /country_partner_hsproduct6digit_year
Transforming /country_partner_hsproductsection_year


In [None]:
failed