# Eurostat exports data

Source: [Eurostat](https://ec.europa.eu/eurostat/data/database)

## 1. Import, tidy and transform data

In [1]:
import pandas as pd

country = 'it' # 'de', 'fr', 'gb', 'it'
material = 'paper' # 'plastics', 'paper'

In [2]:
df = pd.read_csv(
        f'data/raw/de-fr-gb-it-eurostat/{country}-{material}-exports.csv',
        usecols=[*range(0, 4), 7],
        keep_default_na=False, # Necessary because the ISO code for Namibia is ‘NA’
        parse_dates=['PERIOD']
    ) \
    .replace({ 'Value': { ':': '' } }) \
    .assign(Value = lambda x: pd.to_numeric(x['Value'].str.replace(',', ''))) \
    .assign(kg = lambda x: x['Value'] * 100) \
    .rename({
        'PERIOD': 'period',
        'PARTNER': 'iso2',
        'PARTNER_LABEL': 'country_name',
        'REPORTER': 'exporter'
    }, axis=1) \
    .drop('Value', axis=1) \
    .sort_values(['country_name', 'period']) \
    .reset_index(drop=True)

print(len(df))

df.head()

17952


Unnamed: 0,period,iso2,country_name,exporter,kg
0,2013-01-01,AF,AFGHANISTAN,ITALY,
1,2013-02-01,AF,AFGHANISTAN,ITALY,
2,2013-03-01,AF,AFGHANISTAN,ITALY,
3,2013-04-01,AF,AFGHANISTAN,ITALY,
4,2013-05-01,AF,AFGHANISTAN,ITALY,


## 2. Check data types and nulls

In [3]:
df.dtypes

period          datetime64[ns]
iso2                    object
country_name            object
exporter                object
kg                     float64
dtype: object

In [4]:
df.isnull().any()

period          False
iso2            False
country_name    False
exporter        False
kg               True
dtype: bool

## 3. Identify countries importing zero waste

In [5]:
null_countries = df.copy() \
    .groupby('iso2').sum() \
    .query('kg == 0') \
    .index

print(len(null_countries))

null_countries

192


Index(['AD', 'AF', 'AG', 'AI', 'AM', 'AN', 'AO', 'AQ', 'AR', 'AS',
       ...
       'XO', 'XP', 'XR', 'XZ', 'YD', 'YE', 'YT', 'YU', 'ZA', 'ZM'],
      dtype='object', name='iso2', length=192)

## 4. Check monthly export totals

In [6]:
df_monthly = df.copy() \
    .query('iso2 not in @null_countries') \
    .assign(exporter = country.upper()) \
    .reset_index(drop=True)

print(len(df_monthly))

df_monthly.head()

5280


Unnamed: 0,period,iso2,country_name,exporter,kg
0,2013-01-01,AL,ALBANIA,IT,
1,2013-02-01,AL,ALBANIA,IT,
2,2013-03-01,AL,ALBANIA,IT,
3,2013-04-01,AL,ALBANIA,IT,
4,2013-05-01,AL,ALBANIA,IT,


In [7]:
df_monthly.query('period.dt.year == 2017') \
    .groupby(df_monthly['period'].dt.month).sum()

Unnamed: 0_level_0,kg
period,Unnamed: 1_level_1
1,221283800.0
2,192159300.0
3,211557700.0
4,103480800.0
5,136740900.0
6,158193600.0
7,165082200.0
8,117579700.0
9,102540400.0
10,168090000.0


## 5. Write monthly data to CSV

In [8]:
df_monthly.to_csv(
    f'data/processed/de-fr-gb-it-eurostat/{country}-{material}-exports-monthly.csv',
    index=False
)

## 6. Compare H1 2017 to H1 2018

In [9]:
df_h1 = df.copy() \
    .query(
        'iso2 not in @null_countries & \
        period.dt.year in [2017, 2018] & \
        period.dt.month in [1, 2, 3, 4, 5, 6]'
    ) \
    .pivot_table(
        values='kg',
        index='country_name',
        columns=df['period'].dt.year,
        aggfunc='sum'
    ) \
    .assign(pct_change = lambda x: (x[2018] - x[2017]) / x[2017]) \
    .assign(h1_2017_proportion = lambda x: x[2017] / x[2017].sum()) \
    .assign(h1_2018_proportion = lambda x: x[2018] / x[2018].sum()) \
    .assign(pct_change_proportional = lambda x:
        x['h1_2018_proportion'] - x['h1_2017_proportion']
    ) \
    .sort_values('pct_change_proportional')

print(len(df_h1))

df_h1.head()

80


period,2017,2018,pct_change,h1_2017_proportion,h1_2018_proportion,pct_change_proportional
country_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CHINA (PEOPLE'S REPUBLIC OF),543917000.0,211809400.0,-0.610585,0.531472,0.252221,-0.279251
SWITZERLAND (incl. LI->1994),24047800.0,3365300.0,-0.860058,0.023498,0.004007,-0.01949
MALAYSIA,13749000.0,5147500.0,-0.625609,0.013434,0.00613,-0.007305
VIETNAM (excl. NORTH -> 1976),60395600.0,45854700.0,-0.240761,0.059014,0.054603,-0.00441
NETHERLANDS,8084400.0,2970400.0,-0.632576,0.007899,0.003537,-0.004362


## 7. Write H1 comparison data to CSV

In [10]:
# df_h1.to_csv(
#     f'data/processed/de-fr-gb-it-eurostat/{country}-{material}-exports-h1.csv',
#     index=False
# )