In [1]:
import pandas as pd

from owid import catalog
from owid.catalog import charts

In [2]:
country_keys = pd.read_csv('../data/plastic-waste-generation-total.csv')
country_keys.columns = ['country', 'code', 'year', 'total_plastic']
country_keys

Unnamed: 0,country,code,year,total_plastic
0,Albania,ALB,2010,73364
1,Algeria,DZA,2010,1898343
2,Angola,AGO,2010,528843
3,Antigua and Barbuda,ATG,2010,22804
4,Argentina,ARG,2010,2753550
...,...,...,...,...
163,Uruguay,URY,2010,310379
164,Vanuatu,VUT,2010,25443
165,Venezuela,VEN,2010,2669998
166,Vietnam,VNM,2010,3268227


In [3]:
# opening TheWorldBanks's catalogue (through OurWordlInData API) to see what kind of datasets they have related to "plastic"
plastic_catalogs = catalog.find('plastic')
plastic_catalogs.table.unique()

# loading up and taking a first look into some of the datasets 
dataset_name = "plastic_use_polymer"
dataset_path = plastic_catalogs[plastic_catalogs['table'] == dataset_name].path.iloc[0]

# loading method from the docs
rc = catalog.RemoteCatalog() 
plastic_polymer_df = rc[dataset_path]

In [4]:
plastic_polymer_df.loc['World', 2010]

In [6]:
print(country_keys.country.nunique())
print(country_keys.year.unique())
print(country_keys.columns)
print(country_keys['total_plastic'].sum())


168
[2010]
Index(['country', 'code', 'year', 'total_plastic'], dtype='object')
273271934


In [7]:
total_plastic_polymere_production = 348880000.0
total_plastic_waste_generation = 273271934
waste_to_production = total_plastic_waste_generation/total_plastic_polymere_production
print(f'Total % of plastic produced turned to waste: {waste_to_production*100}')

Total % of plastic produced turned to waste: 78.3283461362073


In [8]:
plastic_polymer_df['total_to_2010'] = plastic_polymer_df['total'] / total_plastic_polymere_production
plastic_polymer_df = plastic_polymer_df.reset_index()
plastic_polymer_df = plastic_polymer_df.drop(columns='country')

In [9]:
plastic_polymer_df.head()

Unnamed: 0,year,abs__asa__san,bioplastics,elastomers__tyres,fibres,hdpe,ldpe__lldpe,marine_coatings,other,pet,pp,ps,pur,pvc,road_marking_coatings,total,total_to_2010
0,1990,2683000.0,693000.0,2172000.0,16514000.0,14371000.0,15666000.0,163000.0,23554000.0,6647000.0,20804000.0,6353000.0,5371000.0,14701000.0,195000.0,129886992.0,0.372297
1,1991,2781000.0,718000.0,2259000.0,17186000.0,14967000.0,16250999.0,169000.0,24437000.0,6916000.0,21585000.0,6582000.0,5567000.0,15250000.0,203000.0,134870000.0,0.38658
2,1992,2956000.0,764000.0,2408000.0,18336000.0,15980000.0,17281000.0,180000.0,25990000.0,7376000.0,22960000.0,6991000.0,5915000.0,16216999.0,215000.0,143569008.0,0.411514
3,1993,3077000.0,795000.0,2515000.0,19159000.0,16709999.0,17999000.0,187000.0,27074000.0,7706000.0,23921000.0,7273000.0,6155000.0,16891000.0,224000.0,149684992.0,0.429044
4,1994,3375000.0,872000.0,2767000.0,21095000.0,18411000.0,19754000.0,205000.0,29718000.0,8482000.0,26262000.0,7972000.0,6750000.0,18537000.0,246000.0,164447008.0,0.471357


In [11]:
expanded_df = pd.DataFrame()
for country in country_keys['country'].unique():
    full_df = pd.merge(plastic_polymer_df, country_keys[country_keys['country'] == country], on='year', how ='left')
    full_df['country'] = country
    full_df['total'] = full_df['total'] * full_df['total_to_2010']
    expanded_df = pd.concat([expanded_df, full_df], ignore_index=True)



In [19]:
columns_to_drop = [column for column in expanded_df.columns if column not in (['year', 'total', 'total_to_2010', 'country'])]
expanded_df = expanded_df.drop(columns=columns_to_drop)