In [None]:
import pandas as pd
import numpy as np
import altair as alt
import json
import os
import eco_style
import comtradeapicall

alt.themes.enable("light")

ThemeRegistry.enable('light')

In [281]:
deflators_df = pd.read_csv("imf_ifs_deflators.csv")
deflators_df = deflators_df[[c for c in deflators_df.columns if str(c).isnumeric() or c == "COUNTRY.ID" or c == "COUNTRY"]]
deflators_df = deflators_df.rename(columns={"COUNTRY.ID": "country_id", "COUNTRY": "country_name"})
deflators_df = deflators_df.melt(id_vars=["country_id", "country_name"], var_name="date", value_name="deflator")
deflators_df['date'] = pd.to_datetime(deflators_df['date'], format='%Y')
deflators_df['deflator'] = deflators_df['deflator'].astype(float)/100

In [308]:
us_cpi_df = pd.read_csv("FRED_US_CPI_Index_CPIAUCSL.csv")
us_cpi_df.columns = ["date", "deflator"]
us_cpi_df['date'] = pd.to_datetime(us_cpi_df['date'], format='%Y-%m-%d')
# adjust to 2017=100
comparison_value = us_cpi_df[us_cpi_df['date'] == "2017-01-01"]['deflator'].values[0]
us_cpi_df['deflator'] = us_cpi_df['deflator'] / comparison_value
us_cpi_df = us_cpi_df.dropna()

### US' steel imports

In [266]:

subscription_key = '82d917b31e3948e6a3e5c6f1f5781ed8'

dfs = []
for year in range(2010, 2025):
    df = comtradeapicall.getFinalData(
        subscription_key=subscription_key,
        typeCode='C',
        freqCode='A',  # Annual
        clCode='HS',
        period=str(year),
        reporterCode='842',  # USA
        cmdCode='72',        # Iron and Steel
        flowCode='M',        # Imports
        partnerCode=None,    # All partners
        partner2Code=None,
        customsCode=None,
        motCode=None,
        maxRecords=50000,
        format_output='JSON',
        aggregateBy=None,
        breakdownMode='classic',
        countOnly=None,
        includeDesc=True
    )
    dfs.append(df)

df = pd.concat(dfs, ignore_index=True)
df.to_csv("usa_iron_steel_imports.csv", index=False)

  df = pd.concat(dfs, ignore_index=True)


In [None]:
DEFLATE = True

df = pd.read_csv("usa_iron_steel_imports.csv")
df = df[['period', 'reporterISO', 'reporterDesc', 'partnerISO', 'partnerDesc', 'primaryValue']]
df.columns = ['date', 'reporterISO', 'reporterDesc', 'exporterISO', 'exporterDesc', 'value']
df

# Who are the top 5 exporters of iron and steel to the USA over the period?
top_exporters = df.groupby(by=['exporterDesc', 'exporterISO'])['value'].sum().reset_index().sort_values(by='value', ascending=False).head(10)
callouts = [
    "CAN", "BRA", "MEX", "KOR", "RUS", "CHN"
]

full_df = df.copy()

# Mark everything else as "Other"
df['exporter'] = np.where(df['exporterISO'].isin(callouts), df['exporterDesc'], 'Other')

df = df.query("exporterISO != 'W00'")


df = df.groupby(['date', 'exporter'])['value'].sum().reset_index()

df['value'] = df['value'].astype(float)/1e9

df['label'] = np.where(df.date == df.date.max(), df['exporter'], '')
df['date'] = pd.to_datetime(df['date'], format='%Y')

# Deflate with the US deflator
if DEFLATE:
    df = df.merge(deflators_df.query("country_id == 'USA'")[['date', 'deflator']], on='date', how='left')
    df['value'] = df['value'] / df['deflator']

base = alt.Chart(df).encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title='', axis=alt.Axis(
        format='$,.0f',
        labelExpr="datum.label + (datum.value > 0 ? ' bn' : '')",
        
        )),
    color=alt.Color('exporter:N',
                    legend=None,
                    scale=alt.Scale(
                        domain=df.query("date == 2024").sort_values(by='value', ascending=False).head(10).exporter.unique(),
                        range=["#bcbcbc", "#36B7B4","#E6224B","#F4C245","#0063AF","#00A767","#179FDB","#EB5C2E"]),
                     title='Exporter'),
)

lines = base.mark_line().encode(
    size=alt.condition(
        alt.datum.exporter == 'China',
        alt.value(3),
        alt.value(1.5)
    ),
    opacity=alt.condition(
        alt.datum.exporter == 'China',
        alt.value(1),
        alt.value(0.8)
    ),
)

end_labels = base.mark_text(
    align='left',
    dx=5,
).encode(
    text='label:N',
    size=alt.condition(
        alt.datum.exporter == 'China',
        alt.value(12),
        alt.value(10)
    ),
    opacity=alt.condition(
        alt.datum.exporter == 'China',
        alt.value(1),
        alt.value(0.8)
    )
)


chart = lines + end_labels

chart = chart.properties(
    width=500,
    height=350
)

print("Chapter 72 — Iron and Steel" + (" (Deflated)" if DEFLATE else ""))
chart

Chapter 72 — Iron and Steel (Deflated)


## US Steel article imports

In [None]:

subscription_key = '82d917b31e3948e6a3e5c6f1f5781ed8'

dfs = []
for year in range(2010, 2025):
    df = comtradeapicall.getFinalData(
        subscription_key=subscription_key,
        typeCode='C',
        freqCode='A',  # Annual
        clCode='HS',
        period=str(year),
        reporterCode='842',  # USA
        cmdCode='73',        # Iron and Steel Articles
        flowCode='M',        # Imports
        partnerCode=None,    # All partners
        partner2Code=None,
        customsCode=None,
        motCode=None,
        maxRecords=50000,
        format_output='JSON',
        aggregateBy=None,
        breakdownMode='classic',
        countOnly=None,
        includeDesc=True
    )
    dfs.append(df)

df = pd.concat(dfs, ignore_index=True)
df.to_csv("usa_iron_steel_article_imports.csv", index=False)

  df = pd.concat(dfs, ignore_index=True)


In [288]:
DEFLATE = True

df = pd.read_csv("usa_iron_steel_article_imports.csv")
df = df[['period', 'reporterISO', 'reporterDesc', 'partnerISO', 'partnerDesc', 'primaryValue']]
df.columns = ['date', 'reporterISO', 'reporterDesc', 'exporterISO', 'exporterDesc', 'value']
df

# Who are the top 5 exporters of iron and steel to the USA over the period?
top_exporters = df.groupby(by=['exporterDesc', 'exporterISO'])['value'].sum().reset_index().sort_values(by='value', ascending=False).head(10)
# callouts = [
#     "CAN", "BRA", "MEX", "KOR", "RUS", "CHN"
# ]

callouts = top_exporters.query("exporterISO != 'W00' and exporterISO != 'S19'").head(6).exporterISO.unique().tolist()

full_df = df.copy()

# Mark everything else as "Other"
df['exporter'] = np.where(df['exporterISO'].isin(callouts), df['exporterDesc'], 'Other')

df = df.query("exporterISO != 'W00'")


df = df.groupby(['date', 'exporter'])['value'].sum().reset_index()

df['value'] = df['value'].astype(float)/1e9

df['label'] = np.where(df.date == df.date.max(), df['exporter'], '')
df['date'] = pd.to_datetime(df['date'], format='%Y')

# Deflate with the US deflator
if DEFLATE:
    df = df.merge(deflators_df.query("country_id == 'USA'")[['date', 'deflator']], on='date', how='left')
    df['value'] = df['value'] / df['deflator']


base = alt.Chart(df).encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title='', axis=alt.Axis(
        format='$,.0f',
        labelExpr="datum.label + (datum.value > 0 ? ' bn' : '')",
        
        )),
    color=alt.Color('exporter:N',
                    legend=None,
                    scale=alt.Scale(
                        domain=df.query("date == 2024").sort_values(by='value', ascending=False).head(10).exporter.unique(),
                        range=["#bcbcbc", "#36B7B4","#E6224B","#F4C245","#0063AF","#00A767","#179FDB","#EB5C2E"]),
                     title='Exporter'),
)

lines = base.mark_line().encode(
    size=alt.condition(
        alt.datum.exporter == 'China',
        alt.value(3),
        alt.value(1.5)
    ),
    opacity=alt.condition(
        alt.datum.exporter == 'China',
        alt.value(1),
        alt.value(0.8)
    ),
)

end_labels = base.mark_text(
    align='left',
    dx=5,
).encode(
    text='label:N',
    size=alt.condition(
        alt.datum.exporter == 'China',
        alt.value(12),
        alt.value(10)
    ),
    opacity=alt.condition(
        alt.datum.exporter == 'China',
        alt.value(1),
        alt.value(0.8)
    )
)


chart = lines + end_labels

chart = chart.properties(
    width=500,
    height=350
)

print("Chapter 73 — Iron and Steel articles" + (" (Deflated)" if DEFLATE else ""))
chart.save(f"charts/usa_iron_steel_articles_exports_chapter_73{'_deflated' if DEFLATE else ''}.png", scale_factor=3)
chart.save(f"charts/usa_iron_steel_articles_exports_chapter_73{'_deflated' if DEFLATE else ''}.html")
chart

Chapter 73 — Iron and Steel articles (Deflated)


# China exports to all destinations

In [None]:
# Function to fetch and export data for a given HS chapter
def fetch_and_save(chapter_code, label):
    dfs = []
    for year in range(2010, 2025):
        print(f"Fetching data for year {year}, HS {chapter_code}")
        df = comtradeapicall.getFinalData(
            subscription_key=subscription_key,
            typeCode='C',
            freqCode='A',
            clCode='HS',
            period=str(year),
            reporterCode='156',    # China
            cmdCode=chapter_code,  # '72' or '73'
            flowCode='X',          # Exports
            partnerCode=None,      # All partners
            partner2Code=None,
            customsCode=None,
            motCode=None,
            maxRecords=50000,
            format_output='JSON',
            aggregateBy=None,
            breakdownMode='classic',
            countOnly=None,
            includeDesc=True
        )
        dfs.append(df)

    df_all = pd.concat(dfs, ignore_index=True)
    file_name = f"chn_iron_steel_{label}_exports_HS{chapter_code}.csv"
    df_all.to_csv(file_name, index=False)
    print(f"Saved to {file_name}")

# Run for HS 72 (Iron and Steel)
fetch_and_save('72', 'raw')

# Run for HS 73 (Articles of Iron and Steel)
fetch_and_save('73', 'articles')

Fetching data for year 2010, HS 72
Fetching data for year 2011, HS 72
Fetching data for year 2012, HS 72
Fetching data for year 2013, HS 72
Fetching data for year 2014, HS 72
Fetching data for year 2015, HS 72
Fetching data for year 2016, HS 72
Fetching data for year 2017, HS 72
Fetching data for year 2018, HS 72
Fetching data for year 2019, HS 72
Fetching data for year 2020, HS 72
Fetching data for year 2021, HS 72
Fetching data for year 2022, HS 72
Fetching data for year 2023, HS 72
Fetching data for year 2024, HS 72


  df_all = pd.concat(dfs, ignore_index=True)


Saved to chn_iron_steel_raw_imports_HS72.csv
Fetching data for year 2010, HS 73
Fetching data for year 2011, HS 73
Fetching data for year 2012, HS 73
Fetching data for year 2013, HS 73
Fetching data for year 2014, HS 73
Fetching data for year 2015, HS 73
Fetching data for year 2016, HS 73
Fetching data for year 2017, HS 73
Fetching data for year 2018, HS 73
Fetching data for year 2019, HS 73
Fetching data for year 2020, HS 73
Fetching data for year 2021, HS 73
Fetching data for year 2022, HS 73
Fetching data for year 2023, HS 73
Fetching data for year 2024, HS 73
Saved to chn_iron_steel_articles_imports_HS73.csv


  df_all = pd.concat(dfs, ignore_index=True)


In [108]:
iso_df = pd.read_csv("https://github.com/lukes/ISO-3166-Countries-with-Regional-Codes/blob/master/all/all.csv?raw=true")
country_isos = iso_df['alpha-3'].unique()

In [299]:
DEFLATE = True

df = pd.read_csv("chn_iron_steel_articles_exports_HS73.csv")
df = df[['period', 'partnerISO', 'partnerDesc', 'primaryValue']]
df.columns = ['date', 'importerISO', 'importerDesc', 'value']
df = df[df.importerISO.isin(country_isos)]
df = df.query('date >= 2016')

chart_n = 8

# # Who are the top 5ish importers of iron and steel articles from China over the period?
# top_importers = df.groupby(by=['importerDesc', 'importerISO'])['value'].sum().reset_index().sort_values(by='value', ascending=False).head(10)
# top_importers = top_importers.importerISO.unique().tolist()[:chart_n]

# df['importer'] = np.where(df['importerISO'].isin(top_importers), df['importerDesc'], 'Other')
# full_df = df.copy()
# df = df.groupby(['date', 'importer'])['value'].sum().reset_index()

# Group by region 
df = df.merge(iso_df[['alpha-3', 'region']], left_on='importerISO', right_on='alpha-3', how='left')
df['group'] = np.where(df.importerISO == 'USA', 'USA', df['region'])
df['group'] = np.where(df['group'] == 'Americas', 'Americas (excluding USA)', df['group'])

df['value'] = df['value'].astype(float)/1e9

df['date'] = pd.to_datetime(df['date'], format='%Y')

df= df.groupby(['date', 'group'])['value'].sum().reset_index()
df['label'] = np.where(df.date == df.date.max(), df['group'], '')

# Deflate with the Chinese deflator
if DEFLATE:
    df = df.merge(deflators_df.query("country_id == 'CHN'")[['date', 'deflator']], on='date', how='left')
    df['value'] = df['value'] / df['deflator']


# order the stack by the last value
final_year = df['date'].max()
ranks = (
    df[df['date'] == final_year]
    .sort_values('value', ascending=True)  # optional: ascending for bottom-to-top stack
    .assign(stack=lambda d: range(1, len(d)+1))  # or use .rank() if you want float ranks
    [['group', 'stack']]
)

# Merge ranks back to full df
df = df.merge(ranks, on='group', how='left')

base = alt.Chart(df).encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title='', axis=alt.Axis(
        format='$,.0f',
        labelExpr="datum.label + (datum.value > 0 ? ' bn' : '')",
        )),
        order=alt.Order(
            'stack:N',
            sort='descending'
        ),

    color=alt.Color('group:N',
                    legend=None,
                    scale=alt.Scale(
                        domain=df.query(f"date == 2023").sort_values(by='value', ascending=False).head(10).group.unique(),
                        range=["#36B7B4","#E6224B","#F4C245","#0063AF","#00A767","#179FDB","#EB5C2E"]),
                     title='Importer'),
)

areas = base.mark_area(
    interpolate='linear',
    line=True,
    opacity=0.5
).encode(
    size=alt.condition(
        alt.datum.importer == 'United States of America',
        alt.value(3),
        alt.value(1.5)
    ),
    opacity=alt.condition(
        alt.datum.importer == 'United States of America',
        alt.value(1),
        alt.value(0.8)
    ),
    tooltip=[
        alt.Tooltip('group:N', title='Region'),
        alt.Tooltip('value:Q', title='Value', format=',.0f'),
        alt.Tooltip('date:T', title='Year')
    ]
)

['Africa', 'Americas (excluding USA)', 'Asia', 'Europe', 'Oceania',
       'USA']

dys = {
    "Oceania": -250,
    "Africa": -225,
    "Americas (excluding USA)": -200,
    "USA": -160,
    "Europe": -120,
} if not DEFLATE else {
    "Oceania": -235,
    "Africa": -210,
    "Americas (excluding USA)": -185,
    "USA": -150,
    "Europe": -110,
}

end_labels = base.mark_text(
    align='left',
    dx=5,
    dy=alt.expr(
        f'{json.dumps(dys)}[datum.group] ? {json.dumps(dys)}[datum.group] : 0'
    )
).encode(
    text='label:N',
    size=alt.condition(
        alt.datum.group == 'United States of America',
        alt.value(12),
        alt.value(10)
    ),
    opacity=alt.condition(
        alt.datum.group == 'United States of America',
        alt.value(1),
        alt.value(0.8)
    )
)

print("CHN: Exports of Iron and Steel Articles by Region")
chart = areas + end_labels

chart.save(f"charts/chn_iron_steel_articles_exports_by_region{'_deflated' if DEFLATE else ''}.html")
chart.save(f"charts/chn_iron_steel_articles_exports_by_region{'_deflated' if DEFLATE else ''}.png", scale_factor=3)
chart

CHN: Exports of Iron and Steel Articles by Region


# Normalised version

In [470]:
DEFLATE = True

df = pd.read_csv("chn_iron_steel_articles_exports_HS73.csv")
df = df[['period', 'partnerISO', 'partnerDesc', 'primaryValue']]
df.columns = ['date', 'importerISO', 'importerDesc', 'value']
df = df[df.importerISO.isin(country_isos)]
df = df.query('date >= 2016')

chart_n = 8

# # Who are the top 5ish importers of iron and steel articles from China over the period?
# top_importers = df.groupby(by=['importerDesc', 'importerISO'])['value'].sum().reset_index().sort_values(by='value', ascending=False).head(10)
# top_importers = top_importers.importerISO.unique().tolist()[:chart_n]

# df['importer'] = np.where(df['importerISO'].isin(top_importers), df['importerDesc'], 'Other')
# full_df = df.copy()
# df = df.groupby(['date', 'importer'])['value'].sum().reset_index()

# Group by region 
df = df.merge(iso_df[['alpha-3', 'region']], left_on='importerISO', right_on='alpha-3', how='left')
df['group'] = np.where(df.importerISO == 'USA', 'USA', df['region'])
df['group'] = np.where(df['group'] == 'Americas', 'Americas (excluding USA)', df['group'])

eu = ["AUT","BEL","BGR","HRV","CYP","CZE","DNK","EST","FIN","FRA","DEU","GRC","HUN","IRL","ITA","LVA","LTU","LUX","MLT","NLD","POL","PRT","ROU","SVK","SVN","ESP","SWE","UK"]
df['group'] = np.where(df['importerISO'].isin(eu), 'EU', df['group'])
df['group'] = np.where(df['group'] == 'Europe', 'Europe (excluding EU)', df['group'])



df['value'] = df['value'].astype(float)/1e9

df['date'] = pd.to_datetime(df['date'], format='%Y')

df= df.groupby(['date', 'group'])['value'].sum().reset_index()
df['label'] = np.where(df.date == df.date.max(), df['group'], '')

# Deflate with the Chinese deflator
if DEFLATE:
    df = df.merge(deflators_df.query("country_id == 'CHN'")[['date', 'deflator']], on='date', how='left')
    df['value'] = df['value'] / df['deflator']


# order the stack by the last value
final_year = df['date'].max()
ranks = (
    df[df['date'] == final_year]
    .sort_values('value', ascending=True)  # optional: ascending for bottom-to-top stack
    .assign(stack=lambda d: range(1, len(d)+1))  # or use .rank() if you want float ranks
    [['group', 'stack']]
)

# Merge ranks back to full df
df = df.merge(ranks, on='group', how='left')
df 

Unnamed: 0,date,group,value,label,deflator,stack
0,2016-01-01,Africa,4.380371,,1.01171,3
1,2016-01-01,Americas (excluding USA),4.163658,,1.01171,4
2,2016-01-01,Asia,22.529787,,1.01171,7
3,2016-01-01,EU,5.996911,,1.01171,5
4,2016-01-01,Europe (excluding EU),2.871633,,1.01171,2
5,2016-01-01,Oceania,1.990815,,1.01171,1
6,2016-01-01,USA,8.873246,,1.01171,6
7,2017-01-01,Africa,4.322893,,1.0515,3
8,2017-01-01,Americas (excluding USA),4.725255,,1.0515,4
9,2017-01-01,Asia,23.166481,,1.0515,7


In [484]:
norm_df = df.copy()
norm_df['value'] = norm_df['value'].astype(float)
norm_df['total'] = norm_df.groupby('date')['value'].transform('sum')
norm_df['share'] = norm_df['value'] / norm_df['total'] * 100
norm_df['label'] = np.where(norm_df.date == norm_df.date.max(), norm_df['group'], '')

norm_df = norm_df.query("group == 'EU' or group == 'USA'")

# Base chart
base = alt.Chart(norm_df).encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('share:Q', title='', 
            axis=alt.Axis(
        format='.0f',
        labelExpr="datum.value == 35 ? ['35% of Chinese', 'steel article exports'] : [datum.label + '%']"
    )),
    order=alt.Order(
        'stack:N',
        sort='descending'
    ),
    color=alt.Color('group:N',
                    legend=None,
                    scale=alt.Scale(
                        domain=norm_df.query(f"date == {norm_df.date.max().year}").sort_values(by='share', ascending=False).head(10).group.unique(),
                        range=["#36B7B4","#E6224B","#F4C245","#0063AF","#00A767","#179FDB","#EB5C2E"]
                    ),
                    title='Importer')
)

# Area chart
areas = base.mark_area(
    interpolate='linear',
    line=True,
    opacity=0.5
).encode(
    size=alt.condition(
        alt.datum.group == 'USA',
        alt.value(3),
        alt.value(1.5)
    ),
    opacity=alt.condition(
        alt.datum.group == 'USA',
        alt.value(1),
        alt.value(0.8)
    ),
    tooltip=[
        alt.Tooltip('group:N', title='Region'),
        alt.Tooltip('share:Q', title='Share (%)', format='.1f'),
        alt.Tooltip('date:T', title='Year')
    ]
)

# Label positioning
dys = {
    "Oceania": -285,
    "Africa": -250,
    "Americas (excluding USA)": -225,
    "USA": 0,
    "EU": -110,
    "Europe": -130,
    "Asia": 0
}

# End labels
end_labels = base.mark_text(
    align='left',
    dx=5,
    dy=alt.expr(
        f'{json.dumps(dys)}[datum.group] ? {json.dumps(dys)}[datum.group] : 0'
    )
).encode(
    text='label:N',
)


print("CHN: Exports of Iron and Steel Articles by Region (normalized)")
chart = areas + end_labels
chart.save("charts/chn_iron_steel_articles_exports_by_region_normalized.json")
chart.save("charts/chn_iron_steel_articles_exports_by_region_normalized.png", scale_factor=3)
chart

CHN: Exports of Iron and Steel Articles by Region (normalized)


In [263]:
df.query("group == 'Europe'")

Unnamed: 0,date,group,value,label,stack
3,2016-01-01,Europe,8.95775,,5
9,2017-01-01,Europe,9.719699,,5
15,2018-01-01,Europe,11.401549,,5
21,2019-01-01,Europe,12.673412,,5
27,2020-01-01,Europe,12.520243,,5
33,2021-01-01,Europe,17.865605,,5
39,2022-01-01,Europe,17.466603,,5
45,2023-01-01,Europe,16.155405,Europe,5


# Washing Machines

In [180]:

# Common parameters
params = {
    'subscription_key': subscription_key,
    'typeCode': 'C',
    'freqCode': 'A',
    'clCode': 'HS',
    'period': ','.join(str(y) for y in range(2016, 2025)),
    'reporterCode': '842',       # USA
    'cmdCode': '845011,845020,845090',
    'flowCode': 'M',             # Imports
    'partner2Code': None,
    'customsCode': None,
    'motCode': None,
    'maxRecords': 50000,
    'format_output': 'JSON',
    'aggregateBy': None,
    'breakdownMode': 'classic',
    'countOnly': None,
    'includeDesc': True
}

# 1. Get total US imports (partner = World)
df_total = comtradeapicall.getFinalData(
    **params,
    partnerCode='0'  # '0' = World
)
df_total.to_csv("us_total_washing_machine_imports_2016_2024.csv", index=False)

# 2. Get US imports by all partners (partnerCode=None)
df_by_country = comtradeapicall.getFinalData(
    **params,
    partnerCode=None  # All partners
)
df_by_country.to_csv("us_washing_machine_imports_by_country_2016_2024.csv", index=False)

### US Washing Machines Imports by country

In [350]:
df = pd.read_csv("us_washing_machine_imports_by_country_2016_2024.csv")
df = df[['period', 'reporterISO', 'reporterDesc', 'partnerISO', 'partnerDesc', 'primaryValue', 'cmdDesc', 'cmdCode']]
full_df = df.copy()
full_wm_df = df.copy()
df = df.groupby(['period', 'partnerISO', 'partnerDesc'])['primaryValue'].sum().reset_index()

top_exporters = df.groupby(by=['partnerDesc', 'partnerISO'])['primaryValue'].sum().reset_index().sort_values(by='primaryValue', ascending=False).head(10)
callouts = top_exporters.query("partnerISO != 'W00' and partnerISO != 'S19'").head(6).partnerISO.unique().tolist()
callouts

['CHN', 'MEX', 'THA', 'KOR', 'VNM', 'DEU']

In [343]:
temp_df = full_df.copy()
temp_df = temp_df.query("partnerISO == 'W00'")

temp_df['cmdDesc'] = temp_df['cmdDesc'].map({
    'Washing machines; household or laundry-type, fully-automatic, (of a dry linen capacity not exceeding 10kg)': 'Fully-automatic (<= 10kg)',
    'Washing machines; household or laundry-type, of a dry linen capacity exceeding 10kg': 'Fully-automatic (> 10kg)',
    'Washing machines; parts for household or laundry-type': 'Parts'
})

alt.Chart(temp_df).encode(
    x=alt.X('period:T', title=''),
    y=alt.Y('primaryValue:Q', title='', axis=alt.Axis(
        )),
    color=alt.Color('cmdDesc:N')
).mark_line().properties(
    width=500,
    height=350
)

In [344]:
temp_df = temp_df.drop_duplicates(subset=['cmdCode', 'cmdDesc'])[['cmdCode', 'cmdDesc']]
temp_df

Unnamed: 0,cmdCode,cmdDesc
0,845011,Fully-automatic (<= 10kg)
22,845020,Fully-automatic (> 10kg)
43,845090,Parts


In [363]:
# assemble a washers only dataset for the later chart
df = full_wm_df.copy()
df = df.query("partnerISO == 'W00'")

# exclude the parts
df = df.query("cmdCode != 845090")
df = df[['period', 'primaryValue']]
df = df.groupby(['period'])['primaryValue'].sum().reset_index()
df.columns = ['period', 'primaryValue']
df['product'] = 'Washing machines'
aggregated_wm_df = df.copy()

# In total

In [356]:
DEFLATE = True

df = pd.read_csv("us_total_washing_machine_imports_2016_2024.csv")
df = df[['period', 'partnerISO', 'partnerDesc', 'primaryValue', 'cmdDesc']]
df.columns = ['date', 'exporterISO', 'exporter', 'value', 'good']
df = df.groupby(['date', 'exporterISO', 'exporter'])['value'].sum().reset_index()
df['date'] = pd.to_datetime(df['date'], format='%Y')
df['value'] = df['value'].astype(float)/1e9

if DEFLATE:
    df = df.merge(us_cpi_df[['date', 'deflator']], on='date', how='left')
    df['value'] = df['value'] / df['deflator']

chart = alt.Chart(df).mark_line().encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title='', axis=alt.Axis(
        format='$,.0f',
        labelExpr="datum.label + (datum.value > 0 ? ' bn' : '')",
        
        )),
).properties(
    width=500,
    height=350
)   

chart.save(f"charts/us_total_washing_machine_imports_2016_2024{'_deflated' if DEFLATE else ''}.json")
chart.save(f"charts/us_total_washing_machine_imports_2016_2024{'_deflated' if DEFLATE else ''}.png", scale_factor=3)
chart

# Washers vs others

In [357]:
# Set up product codes and labels
product_hs = {
    "washing_machines": "845011",
    "dryers": "845121",
    "fridges_freezers": "841810,841829",
    "dishwashers": "842211"
}


# Common parameters
base_params = {
    'subscription_key': subscription_key,
    'typeCode': 'C',
    'freqCode': 'A',
    'clCode': 'HS',
    'period': ','.join(str(y) for y in range(2014, 2025)),
    'reporterCode': '842',  # USA
    'flowCode': 'M',        # Imports
    'partnerCode': '0',     # World (total)
    'partner2Code': None,
    'customsCode': None,
    'motCode': None,
    'maxRecords': 5000,
    'format_output': 'JSON',
    'aggregateBy': None,
    'breakdownMode': 'classic',
    'countOnly': None,
    'includeDesc': True
}

# Create a folder to store the output
os.makedirs("appliance_imports", exist_ok=True)

# Loop over each product and fetch
for label, cmd_code in product_hs.items():
    print(f"Fetching data for: {label}")
    df = comtradeapicall.getFinalData(
        **base_params,
        cmdCode=cmd_code
    )
    out_path = f"appliance_imports/{label}_us_imports_2016_2024.csv"
    df.to_csv(out_path, index=False)
    print(f"Saved to {out_path}")

Fetching data for: washing_machines
Saved to appliance_imports/washing_machines_us_imports_2016_2024.csv
Fetching data for: dryers
Saved to appliance_imports/dryers_us_imports_2016_2024.csv
Fetching data for: fridges_freezers
Saved to appliance_imports/fridges_freezers_us_imports_2016_2024.csv
Fetching data for: dishwashers
Saved to appliance_imports/dishwashers_us_imports_2016_2024.csv


In [463]:
DEFLATE = True

# Define file paths and labels
product_files = {
    # "Washing machines": "appliance_imports/washing_machines_us_imports_2016_2024.csv",
    "Dryers": "appliance_imports/dryers_us_imports_2016_2024.csv",
    "Fridges & freezers": "appliance_imports/fridges_freezers_us_imports_2016_2024.csv",
    "Dishwashers": "appliance_imports/dishwashers_us_imports_2016_2024.csv"
}

# Load and process each
dfs = [
    aggregated_wm_df
]
for label, path in product_files.items():
    df = pd.read_csv(path)
    df = df[['period', 'primaryValue']]
    df = df.groupby('period', as_index=False)['primaryValue'].sum()
    df['product'] = label
    dfs.append(df)

# Combine into single DataFrame
df_all = pd.concat(dfs, ignore_index=True)

# Index to 2017 = 100
base = df_all[df_all['period'] == 2017].set_index('product')['primaryValue']
df_all['index'] = df_all.apply(lambda row: row['primaryValue'] / base[row['product']] * 100, axis=1)

# Optional: tidy format for plotting
df = df_all[['period', 'product', 'index']].rename(columns={'period': 'year'})

df['end_label'] = np.where(df.year == df.year.max(), df['product'], '')
df['year'] = pd.to_datetime(df['year'], format='%Y')

# df = df.query("year <= '2021-01-01'")


if DEFLATE:
    df = df.merge(us_cpi_df[['date', 'deflator']], left_on='year', right_on='date', how='left')
    df['index'] = df['index'] / df['deflator']

base = alt.Chart(df
                 ).encode(
    x=alt.X('year:T', title=''),
    y=alt.Y('index:Q', title='', axis=alt.Axis(
        format=',.0f',
        labelExpr="datum.value == 100? '2017=100' : datum.label"
        
        )),
    color=alt.Color('product:N',
                    legend=None,
                    scale=alt.Scale(
                        domain=df.query("year == 2024").sort_values(by='index', ascending=False).head(10)['product'].unique(),
                        # range=[ "#36B7B4","#E6224B","#F4C245"]
                        ),
                     title='Product'),
)

lines = base.mark_line().encode(
    size=alt.condition(
        alt.datum.product == 'Washing machines',
        alt.value(3),
        alt.value(1.5)
    ),
    opacity=alt.condition(
        alt.datum.product == 'Washing machines',
        alt.value(1),
        alt.value(0.8)
    ),
)

end_labels = base.mark_text(
    align='left',
    dx=5,
    dy=alt.expr("datum.product == 'Fridges & freezers' ? 5 : 0") if DEFLATE else 0
).encode(
    text='end_label:N',
    size=alt.condition(
        alt.datum.product == 'Washing machines',
        alt.value(12),
        alt.value(10)
    ),
    opacity=alt.condition(
        alt.datum.product == 'Washing machines',
        alt.value(1),
        alt.value(0.8)
    )
)
chart = lines + end_labels
chart.save("charts/us_appliance_imports_indexed.json")
chart.save("charts/us_appliance_imports_indexed.png", scale_factor=3)
chart


# Solar

In [464]:
unit = ["First Unit of Quantity", "FIRST_UNIT_QUANTITY"]
# unit = ["FAS Value", "FAS Value"]

df = pd.read_excel("US_Solar_DataWeb-Query-Export.xlsx", sheet_name=unit[0])
targets = [8541406020, 8541406030]
df = df[df['Schedule B'].isin(targets)]
df = df[['Year', 'Schedule B', 'Description', unit[1]]]
df.columns = ['year', 'cmdCode', 'cmdDesc', 'value']
df = df.query("value > 0 and year > 2014")

# Index to 2017 = 100
base = df[df['year'] == 2017].set_index('cmdDesc')['value']
df['index'] = df.apply(lambda row: row['value'] / base[row['cmdDesc']] * 100, axis=1)
df['end_label'] = np.where(df.year == df.year.max(), df['cmdDesc'], '')

['SOLAR CELLS ASSEMBLED INTO MODULES OR PANELS',
       'SOLAR CELLS, NOT ASSEMBLED INTO MODULES OR MADE UP INTO PANELS']

df['product'] = df['cmdDesc'].map({
    'SOLAR CELLS ASSEMBLED INTO MODULES OR PANELS': 'Solar panels and modules',
    'SOLAR CELLS, NOT ASSEMBLED INTO MODULES OR MADE UP INTO PANELS': 'Solar cells (not assembled)'
})


#df['year'] = pd.to_datetime(df['year'], format='%Y')
import altair as alt

chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('cmdDesc:N', 
            axis=alt.Axis(labels=False, ticks=False),
            title=''),
    y=alt.Y('index:Q', 
            axis=alt.Axis(format=',.0f',
                labelExpr="datum.value == 100? '2017=100' : datum.label"
            ),
            title=''),
    color=alt.Color('product:N',
                    legend=alt.Legend(
                        orient='top',
                        title=''
                    ),
                    scale=alt.Scale(),
                    title='Product'),
    column=alt.Column('year:N', 
                      header=alt.Header(labelOrient='bottom',
                                        labelColor="#676A86",
                                         labelPadding=5, titleOrient='bottom', titlePadding=0),
                      title='', spacing=10),  # spacing adds space between columns
    tooltip=[
        alt.Tooltip('product:N', title='Product'),
        alt.Tooltip('index:Q', title='Value', format=',.0f'),
        alt.Tooltip('year:T', title='Year')
    ]
).properties(
    width=60,
    height=300
)

dotted_100_line = alt.Chart(pd.DataFrame({'y': [100]})).mark_rule(strokeDash=[5, 5], color='#676A86').encode(
    y=alt.Y('y:Q', title=''),
)

chart = chart 

chart.save("charts/us_solar_imports_indexed.json")
chart.save("charts/us_solar_imports_indexed.png", scale_factor=3)
chart

  warn("Workbook contains no default style, apply openpyxl's default")


In [436]:
df.cmdDesc.unique()

array(['SOLAR CELLS ASSEMBLED INTO MODULES OR PANELS',
       'SOLAR CELLS, NOT ASSEMBLED INTO MODULES OR MADE UP INTO PANELS'],
      dtype=object)

In [117]:
df.query("date == 2023")

Unnamed: 0,date,importer,value,label
49,2023-01-01,Australia,3.393829,Australia
50,2023-01-01,Japan,3.87822,Japan
51,2023-01-01,Other,65.438924,Other
52,2023-01-01,Philippines,2.853368,Philippines
53,2023-01-01,Rep. of Korea,4.315917,Rep. of Korea
54,2023-01-01,USA,13.201072,USA
55,2023-01-01,Viet Nam,3.059887,Viet Nam


In [47]:
full_df.query("date == 2015").sort_values(by='value', ascending=False).head(10)

Unnamed: 0,date,reporterISO,reporterDesc,exporterISO,exporterDesc,value
476,2015,USA,USA,W00,World,27865820000.0
494,2015,USA,USA,CAN,Canada,4508541000.0
489,2015,USA,USA,BRA,Brazil,3081039000.0
525,2015,USA,USA,KOR,Rep. of Korea,2382669000.0
498,2015,USA,USA,CHN,China,2012114000.0
523,2015,USA,USA,JPN,Japan,1699412000.0
547,2015,USA,USA,RUS,Russian Federation,1662080000.0
528,2015,USA,USA,MEX,Mexico,1514700000.0
512,2015,USA,USA,DEU,Germany,1281981000.0
570,2015,USA,USA,TUR,Türkiye,1186734000.0


In [43]:
top_exporters

Unnamed: 0,exporterDesc,exporterISO,value
148,World,W00,449014500000.0
27,Canada,CAN,92383160000.0
23,Brazil,BRA,51000570000.0
87,Mexico,MEX,38779730000.0
112,Rep. of Korea,KOR,26784640000.0
115,Russian Federation,RUS,25764730000.0
71,Japan,JPN,20657590000.0
55,Germany,DEU,18541220000.0
30,China,CHN,16545020000.0
103,"Other Asia, nes",S19,14058820000.0
