# US Census Bureau exports data

Source: [US Census Bureau](https://usatrade.census.gov/index.php)

## Plastics exports

---

### 1. Import, tidy and transform data

In [1]:
import pandas as pd

In [2]:
df_plastics = pd.read_csv(
        f'data/raw/us-uscb/us-plastics-exports.csv',
        header=4,
        usecols=[*range(0, 4)],
        dtype={ 'Quantity 1': 'int' },
        parse_dates=['Time'],
        thousands=','
    ) \
    .rename({
        'Country': 'country_code',
        'Time': 'period',
        'Quantity 1': 'kg'
    }, axis=1) \
    .groupby(['country_code', 'period'], as_index=False)['kg'].sum() \
    .sort_values(['country_code', 'period']) \
    .reset_index(drop=True)

print(len(df_plastics))

df_plastics.head()

3345


Unnamed: 0,country_code,period,kg
0,1220,2013-01-01,10331054
1,1220,2013-02-01,9314511
2,1220,2013-03-01,10729346
3,1220,2013-04-01,11235408
4,1220,2013-05-01,12423949


### 2. Check data types and nulls

In [3]:
df_plastics.dtypes

country_code             int64
period          datetime64[ns]
kg                       int64
dtype: object

In [4]:
df_plastics.isnull().any()

country_code    False
period          False
kg              False
dtype: bool

### 3. Join to country names

In [5]:
df_countries = pd.read_csv(
    'data/raw/us-uscb/country2.csv',
    keep_default_na=False # Necessary because the ISO code for Namibia is ‘NA’
)

print(len(df_countries))

df_countries.head()

241


Unnamed: 0,Code,Name,ISO Code
0,5310,Afghanistan,AF
1,4810,Albania,AL
2,7210,Algeria,DZ
3,9510,American Samoa,AS
4,4271,Andorra,AD


In [6]:
df_plastics_joined = df_plastics.copy() \
    .merge(df_countries, 'left', left_on='country_code', right_on='Code') \
    .drop(['country_code', 'Code'], axis=1) \
    .rename({
        'Name': 'country_name',
        'ISO Code': 'iso2'
    }, axis=1) \
    .sort_values(['country_name', 'period']) \
    .reset_index(drop=True)

print(len(df_plastics_joined))

df_plastics_joined.head()

3345


Unnamed: 0,period,kg,country_name,iso2
0,2018-05-01,20000,Afghanistan,AF
1,2015-08-01,8000,Algeria,DZ
2,2018-05-01,16000,Algeria,DZ
3,2013-10-01,17754,Angola,AO
4,2013-11-01,18900,Angola,AO


### 4. Write monthly data to CSV

In [7]:
df_plastics_m = df_plastics_joined.copy() \
    .assign(exporter = 'US')

df_plastics_m.head()

Unnamed: 0,period,kg,country_name,iso2,exporter
0,2018-05-01,20000,Afghanistan,AF,US
1,2015-08-01,8000,Algeria,DZ,US
2,2018-05-01,16000,Algeria,DZ,US
3,2013-10-01,17754,Angola,AO,US
4,2013-11-01,18900,Angola,AO,US


In [8]:
df_plastics_m[['period', 'iso2', 'country_name', 'exporter', 'kg']].to_csv(
    'data/processed/us-uscb/us-plastics-exports-monthly.csv',
    index=False
)

### 5. Compare H1 2017 to H1 2018

In [9]:
df_plastics_h1 = df_plastics_joined.query(
        'period.dt.year in [2017, 2018] & \
        period.dt.month in [1, 2, 3, 4, 5, 6]'
    ) \
    .pivot_table(
        values='kg',
        index='country_name',
        columns=df_plastics_joined['period'].dt.year,
        aggfunc='sum'
    ) \
    .assign(pct_change = lambda x: (x[2018] - x[2017]) / x[2017]) \
    .assign(h1_2017_proportion = lambda x: x[2017] / x[2017].sum()) \
    .assign(h1_2018_proportion = lambda x: x[2018] / x[2018].sum()) \
    .assign(pct_change_proportional = lambda x:
        x['h1_2018_proportion'] - x['h1_2017_proportion']
    ) \
    .sort_values('pct_change_proportional')

df_plastics_h1.head()

period,2017,2018,pct_change,h1_2017_proportion,h1_2018_proportion,pct_change_proportional
country_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
China,379379636.0,30247574.0,-0.920271,0.399436,0.045365,-0.354071
Hong Kong,257663474.0,60453277.0,-0.765379,0.271285,0.090667,-0.180618
Indonesia,20189337.0,8267584.0,-0.590497,0.021257,0.0124,-0.008857
Singapore,3278443.0,619660.0,-0.81099,0.003452,0.000929,-0.002522
Colombia,2353296.0,1110145.0,-0.52826,0.002478,0.001665,-0.000813


### 6. Write H1 comparison data to CSV

In [10]:
# df_plastics_h1.to_csv(
#     'data/processed/us-uscb/us-plastics-exports-h1.csv',
#     index=False
# )

## Paper exports

---

In [11]:
df_paper = pd.read_csv(
        'data/raw/us-uscb/us-paper-exports.csv',
        header=4,
        usecols=[*range(0, 4)],
        dtype={ 'Quantity 1': 'int' },
        parse_dates=['Time'],
        thousands=','
    ) \
    .assign(kg = lambda x: x['Quantity 1'] * 1000) \
    .drop('Quantity 1', axis=1) \
    .rename({
        'Country': 'country_code',
        'Time': 'period',
    }, axis=1) \
    .groupby(['country_code', 'period'], as_index=False).sum() \
    .sort_values(['country_code', 'period']) \
    .reset_index(drop=True)

print(len(df_paper))

df_paper.head()

3248


Unnamed: 0,country_code,period,kg
0,1220,2013-01-01,56766000
1,1220,2013-02-01,47396000
2,1220,2013-03-01,49992000
3,1220,2013-04-01,51477000
4,1220,2013-05-01,52426000


In [12]:
df_paper.dtypes

country_code             int64
period          datetime64[ns]
kg                       int64
dtype: object

In [13]:
df_paper.isnull().any()

country_code    False
period          False
kg              False
dtype: bool

In [14]:
df_paper_joined = df_paper.copy() \
    .merge(df_countries, 'left', left_on='country_code', right_on='Code') \
    .drop(['country_code', 'Code'], axis=1) \
    .rename({
        'Name': 'country_name',
        'ISO Code': 'iso2'
    }, axis=1) \
    .sort_values(['country_name', 'period']) \
    .reset_index(drop=True)

print(len(df_paper_joined))

df_paper_joined.head()

3248


Unnamed: 0,period,kg,country_name,iso2
0,2018-03-01,20000,Afghanistan,AF
1,2013-10-01,32000,Albania,AL
2,2018-05-01,177000,Albania,AL
3,2013-02-01,68000,Algeria,DZ
4,2013-03-01,38000,Algeria,DZ


In [15]:
df_paper_m = df_paper_joined.copy() \
    .assign(exporter = 'US')

df_paper_m.head()

Unnamed: 0,period,kg,country_name,iso2,exporter
0,2018-03-01,20000,Afghanistan,AF,US
1,2013-10-01,32000,Albania,AL,US
2,2018-05-01,177000,Albania,AL,US
3,2013-02-01,68000,Algeria,DZ,US
4,2013-03-01,38000,Algeria,DZ,US


In [16]:
df_paper_m[['period', 'iso2', 'country_name', 'exporter', 'kg']].to_csv(
    'data/processed/us-uscb/us-paper-exports-monthly.csv',
    index=False
)

In [17]:
df_paper_h1 = df_paper_joined.query(
        'period.dt.year in [2017, 2018] & \
        period.dt.month in [1, 2, 3, 4, 5, 6]'
    ) \
    .pivot_table(
        values='kg',
        index='country_name',
        columns=df_paper_joined['period'].dt.year,
        aggfunc='sum'
    ) \
    .assign(pct_change = lambda x: (x[2018] - x[2017]) / x[2017]) \
    .assign(h1_2017_proportion = lambda x: x[2017] / x[2017].sum()) \
    .assign(h1_2018_proportion = lambda x: x[2018] / x[2018].sum()) \
    .assign(pct_change_proportional = lambda x:
        x['h1_2018_proportion'] - x['h1_2017_proportion']
    ) \
    .sort_values('pct_change_proportional')

df_paper_h1.head()

period,2017,2018,pct_change,h1_2017_proportion,h1_2018_proportion,pct_change_proportional
country_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
China,6172292000.0,3792834000.0,-0.385506,0.664276,0.413448,-0.250828
Chile,20976000.0,14738000.0,-0.297387,0.002257,0.001607,-0.000651
Bolivia,3409000.0,289000.0,-0.915224,0.000367,3.2e-05,-0.000335
Panama,1570000.0,84000.0,-0.946497,0.000169,9e-06,-0.00016
Argentina,1601000.0,181000.0,-0.886946,0.000172,2e-05,-0.000153


In [18]:
# df_paper_h1.to_csv('data/processed/us-uscb/us-paper-exports-h1.csv')