In [1]:
import pandas as pd

from owid import catalog
from owid.catalog import charts

In [2]:
country_keys = pd.read_csv('../data/plastic-waste-generation-total.csv')
country_keys.columns = ['country', 'code', 'year', 'total_plastic_waste']
country_keys

Unnamed: 0,country,code,year,total_plastic_waste
0,Albania,ALB,2010,73364
1,Algeria,DZA,2010,1898343
2,Angola,AGO,2010,528843
3,Antigua and Barbuda,ATG,2010,22804
4,Argentina,ARG,2010,2753550
...,...,...,...,...
163,Uruguay,URY,2010,310379
164,Vanuatu,VUT,2010,25443
165,Venezuela,VEN,2010,2669998
166,Vietnam,VNM,2010,3268227


In [3]:
# opening TheWorldBanks's catalogue
plastic_catalogs = catalog.find('plastic')
dataset_name = "plastic_use_polymer"
dataset_path = plastic_catalogs[plastic_catalogs['table'] == dataset_name].path.iloc[0]

# loading method from the docs
rc = catalog.RemoteCatalog() 
plastic_polymer_df = rc[dataset_path]

In [4]:
plastic_polymer_df.loc['World', 2010]

In [5]:
print(country_keys.country.nunique())
print(country_keys.year.unique())
print(country_keys.columns)
print(country_keys['total_plastic_waste'].sum())


168
[2010]
Index(['country', 'code', 'year', 'total_plastic_waste'], dtype='object')
273271934


In [6]:
total_plastic_polymere_production = plastic_polymer_df.loc['World', 2010]['total']
total_plastic_waste_generation = country_keys['total_plastic_waste'].sum()
waste_to_production = total_plastic_waste_generation/total_plastic_polymere_production
print(f'Total % of plastic produced turned to waste: {waste_to_production*100}')

Total % of plastic produced turned to waste: 78.3283461362073


In [7]:
plastic_polymer_df['total_to_2010'] = plastic_polymer_df['total'] / total_plastic_polymere_production
plastic_polymer_df = plastic_polymer_df.reset_index()
plastic_polymer_df = plastic_polymer_df.drop(columns='country')

In [8]:
columns_to_drop = [column for column in plastic_polymer_df.columns if column not in (['year', 'total', 'total_to_2010'])]
plastic_polymer_df = plastic_polymer_df.drop(columns=columns_to_drop)
plastic_polymer_df.head()

Unnamed: 0,year,total,total_to_2010
0,1990,129886992.0,0.372297
1,1991,134870000.0,0.38658
2,1992,143569008.0,0.411514
3,1993,149684992.0,0.429044
4,1994,164447008.0,0.471357


In [9]:
# expanded_df = pd.DataFrame()
# for country in country_keys['country'].unique():
#     full_df = pd.merge(plastic_polymer_df, country_keys[country_keys['country'] == country], on='year', how ='left')
#     full_df['country'] = country
#     full_df['total'] = full_df['total'] * full_df['total_to_2010']
#     expanded_df = pd.concat([expanded_df, full_df], ignore_index=True)



In [10]:
# columns_to_drop = [column for column in expanded_df.columns if column not in (['year', 'total', 'total_to_2010', 'country'])]
# expanded_df = expanded_df.drop(columns=columns_to_drop)

In [14]:
# adjust country keys with the coefficient waste_to_production to get the number of plastic produced
# ASSUMPTION 1
country_keys['total_plastic_waste'] /= waste_to_production
country_keys.columns = ['country', 'code', 'year', 'total_plastic_production']

In [63]:
extended_df = pd.DataFrame() 
for country in country_keys['country'].unique():
    working_country = country_keys[country_keys['country'] == country]
    a = plastic_polymer_df.copy()
    a['country_produced_plastic'] = a['total_to_2010'] * working_country['total_plastic_production'].iloc[0]
    a['country'] = country
    a = a.drop(columns=['total', 'total_to_2010'])
    extended_df = pd.concat([extended_df, a])


In [65]:
extended_df['country_produced_plastic'].nunique()

5040

* What I did here, is I extracted dataset with total polymere production in 1990-2019 (worldwide), and extracted another dataset with 168 countries data with plastic waste generation in 2010.
* Then I calculated the total ammount of plastic waste generated (ASSUMPTION 1: the world consist of 168 countries and no more), and got a proportion of plastic to waste generation. To get the approximate number of plastic that each country produced in 2010. 
* To get the country production by year, I saw how much the prodiction in the first dataset was changing (in proportion to 2010) and applied these proportions to every country

TODO: 
* [] try to find the data for each country's GDP 
* [] adjust total production of polymers to the proprtion of this countries GDP's production (details are in the notes)
* [] correct the number of yearly prodiction to the proportion of this countries GDP within years.