In [1]:
# Imports
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib

In [2]:
pd.options.display.max_seq_items = 2000

## Grabbing & Cleaning Data

In [3]:
#cleaning and adding ghg data
ghg = pd.read_csv('../data/dirty_data/total-ghg-emissions.csv')
ghg.drop(columns=['Code'], inplace=True)
ghg.rename(columns={"Total GHG emissions including LUCF (CAIT)":"Total GHG"}, inplace=True)
ghg = ghg[ghg['Year']>1989]
ghg = ghg[ghg['Year'] < 2015]
ghg.to_csv('../data/cleaned_data/ghg.csv')

In [4]:
#cleaning and adding airpollution death share data
airpol_death_share = pd.read_csv('../data/dirty_data/share-deaths-air-pollution.csv')
airpol_death_share.drop(columns=['Code'], inplace=True)
airpol_death_share.rename(columns={"Deaths - Cause: All causes - Risk: Air pollution - Sex: Both - Age: Age-standardized (Percent)": "Deaths %"}, inplace=True)
airpol_death_share = airpol_death_share[airpol_death_share['Year']>1989]
airpol_death_share = airpol_death_share[airpol_death_share['Year'] < 2015]
airpol_death_share.to_csv('../data/cleaned_data/airpol_death_share.csv')

In [5]:
#cleaning and adding airpollution data
airpol = pd.read_csv('../data/dirty_data/PM25-air-pollution.csv')
airpol.drop(columns=['Code'], inplace=True)
airpol.rename(columns={"PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)": "Air Pollution"}, inplace=True)
airpol = airpol[airpol['Year']>1989]
airpol = airpol[airpol['Year'] < 2015]
airpol.to_csv('../data/cleaned_data/airpol.csv')

In [6]:
#cleaning and adding net zero binary data
net_zero_binary = pd.read_csv('../data/dirty_data/net-zero-target-set.csv')
net_zero_binary.drop(columns=['Code'], inplace=True)
net_zero_binary.rename(columns={"Is there a net-zero target?":"Net-Zero Target"}, inplace=True)
net_zero_binary.rename(columns={"Year":"Year-Zs"}, inplace=True)
net_zero_binary['Net-Zero Target'] = net_zero_binary['Net-Zero Target'].map({"Yes":1, "No":0})
#net_zero_binary = net_zero_binary[net_zero_binary['Year']>1989]
net_zero_binary.to_csv('../data/cleaned_data/net_zero_binary.csv')

In [7]:
#cleaning and adding food co2 emmissions data
food_co2e = pd.read_csv('../data/dirty_data/emissions-from-food.csv')
food_co2e.drop(columns=['Code'], inplace=True)
food_co2e.rename(columns={"Food emissions by country":"Food Emissions"}, inplace=True)
food_co2e = food_co2e[food_co2e['Year']>1989]
food_co2e = food_co2e[food_co2e['Year'] < 2015]
food_co2e.to_csv('../data/cleaned_data/food_co2e.csv')

In [8]:
#cleaning and adding ozone data
ozone = pd.read_csv('../data/dirty_data/consumption-of-ozone-depleting-substances.csv')
ozone.drop(columns=['Code'], inplace=True)
ozone.rename(columns={"Consumption of Ozone-Depleting Substances - All":"Consumption of Ozone"}, inplace=True)
ozone = ozone[ozone['Year']>1989]
ozone = ozone[ozone['Year'] < 2015]
ozone.to_csv('../data/cleaned_data/ozone.csv')

In [9]:
#cleaning and adding transport data
transport = pd.read_csv('../data/dirty_data/co2-emissions-transport.csv')
transport.drop(columns=['Code'], inplace=True)
transport.rename(columns={"Transport (CAIT, 2020)":"Transport"}, inplace=True)
transport = transport[transport['Year']>1989]
transport = transport[transport['Year'] < 2015]
transport = transport[transport['Entity'] != 'World']
transport = transport[transport['Entity'] != 'European Union (27)']
transport.to_csv('../data/cleaned_data/transport.csv')

In [10]:
#cleaning and adding co2 per capital data
co2_percap = pd.read_csv('../data/dirty_data/co-emissions-per-capita.csv')
co2_percap.drop(columns=['Code'], inplace=True)
co2_percap.rename(columns={"Annual CO2 emissions (per capita)":"CO2 Emissions Per Cap"}, inplace=True)
co2_percap = co2_percap[co2_percap['Year']>1989]
co2_percap = co2_percap[co2_percap['Year'] < 2015]
co2_percap.to_csv('../data/cleaned_data/co2_percap.csv')

In [11]:
share_co2 = pd.read_csv('../data/dirty_data/annual-share-of-co2-emissions.csv')
share_co2.drop(columns=['Code'], inplace=True)
share_co2.rename(columns={"Share of global annual CO2 emissions":"Shared CO2 Emissions"}, inplace=True)
share_co2 = share_co2[share_co2['Year']>1989]
share_co2 = share_co2[share_co2['Year'] < 2015]
share_co2.to_csv('../data/cleaned_data/share_co2.csv')

In [12]:
co2 = pd.read_csv('../data/dirty_data/annual_country_co2.csv')
co2.drop(columns=['Code'], inplace=True)
co2.rename(columns={"Annual CO2 emissions":"CO2 Emissions"}, inplace=True)
co2 = co2[co2['Year']>1989]
co2 = co2[co2['Year'] < 2015]
co2.to_csv('../data/cleaned_data/co2.csv')

In [13]:
df = pd.read_csv('../data/cleaned_data/kahuna_clusters_cancer.csv')

In [14]:
list_data = [airpol_death_share, co2, co2_percap, food_co2e, ghg, ozone, share_co2, transport]

------------------------------

## Combining All Data

In [15]:
combos = pd.merge(airpol_death_share, co2, on=['Year', 'Entity'])
combos = pd.merge(combos, co2_percap, on=['Year', 'Entity'])
combos = pd.merge(combos, food_co2e, on=['Year', 'Entity'])
combos = pd.merge(combos, ghg, on=['Year', 'Entity'])
combos = pd.merge(combos, ozone, on=['Year', 'Entity'])
combos = pd.merge(combos, share_co2, on=['Year', 'Entity'])
combos = pd.merge(combos, transport, on=['Year', 'Entity'])
combos = combos.loc[:,~combos.columns.duplicated()]

In [16]:
combos.to_csv('../data/cleaned_data/all_cleaned.csv')