### Setup

In [None]:
import pandas as pd
import os
import missingno as msno

# %matplotlib inline
# https://github.com/microsoft/vscode-jupyter/issues/1948
%matplotlib widget
from IPython.display import display

In [None]:
%%time
wdi_df = pd.read_excel(os.path.join("data", "WDIEXCEL.xlsx"))
series_df = pd.read_excel(os.path.join("data", "WDIEXCEL.xlsx"), sheet_name="Series")

### Simplify Dataset

In [None]:
wdi_merge = pd.merge(
    wdi_df, series_df, how="left", left_on="Indicator Code", right_on="Series Code"
)
wdi_ghg = wdi_merge.drop(
    [
        "Indicator Name_y",
        "Short definition",
        "Long definition",
        "Unit of measure",
        "Periodicity",
        "Base Period",
        "Other notes",
        "Aggregation method",
        "Limitations and exceptions",
        "Notes from original source",
        "General comments",
        "Source",
        "Statistical concept and methodology",
        "Development relevance",
        "Related source links",
        "Other web links",
        "Related indicators",
        "License Type",
        "Series Code",
    ],
    axis=1,
)
wdi_ghg = wdi_ghg[
    wdi_ghg["Indicator Name_x"].str.contains("CO2 equivalent")
    | (wdi_ghg["Indicator Name_x"] == "Population, total")
]
wdi_ghg.to_csv(os.path.join("data", "wdi_ghgs_co2e.csv"))
wdi_ghg

In [None]:
wdi_ghg_unpivot = pd.melt(
    wdi_ghg,
    id_vars=["Country Name", "Indicator Name_x"],
    value_vars=[
        "1960",
        "1961",
        "1962",
        "1963",
        "1964",
        "1965",
        "1966",
        "1967",
        "1968",
        "1969",
        "1970",
        "1971",
        "1972",
        "1973",
        "1974",
        "1975",
        "1976",
        "1977",
        "1978",
        "1979",
        "1980",
        "1981",
        "1982",
        "1983",
        "1984",
        "1985",
        "1986",
        "1987",
        "1988",
        "1989",
        "1990",
        "1991",
        "1992",
        "1993",
        "1994",
        "1995",
        "1996",
        "1997",
        "1998",
        "1999",
        "2000",
        "2001",
        "2002",
        "2003",
        "2004",
        "2005",
        "2006",
        "2007",
        "2008",
        "2009",
        "2010",
        "2011",
        "2012",
        "2013",
        "2014",
        "2015",
        "2016",
        "2017",
        "2018",
        "2019",
        "2020",
    ],
    var_name=["Year"],
)
wdi_ghg_unpivot.to_csv(os.path.join("data", "wdi_ghgs_co2e_unpivot.csv"))

### Calculate global totals

In [None]:
# 1) Num countries that have the data
## of years that have the data
wdi_ghg_total = wdi_ghg.groupby("Indicator Name_x", as_index=False).sum()
wdi_ghg_unpivot = pd.melt(
    wdi_ghg_total,
    id_vars=["Indicator Name_x"],
    value_vars=[
        "1960",
        "1961",
        "1962",
        "1963",
        "1964",
        "1965",
        "1966",
        "1967",
        "1968",
        "1969",
        "1970",
        "1971",
        "1972",
        "1973",
        "1974",
        "1975",
        "1976",
        "1977",
        "1978",
        "1979",
        "1980",
        "1981",
        "1982",
        "1983",
        "1984",
        "1985",
        "1986",
        "1987",
        "1988",
        "1989",
        "1990",
        "1991",
        "1992",
        "1993",
        "1994",
        "1995",
        "1996",
        "1997",
        "1998",
        "1999",
        "2000",
        "2001",
        "2002",
        "2003",
        "2004",
        "2005",
        "2006",
        "2007",
        "2008",
        "2009",
        "2010",
        "2011",
        "2012",
        "2013",
        "2014",
        "2015",
        "2016",
        "2017",
        "2018",
        "2019",
        "2020",
    ],
    var_name=["Year"],
)
wdi_ghg_unpivot = wdi_ghg_unpivot.rename(columns={"value": "Total Emissions"})

In [None]:
wdi_ghg_total.to_csv(os.path.join("data", "wdi_global_ghg.csv"))

### Plot

In [None]:
# wdi_unpivot_2=wdi_unpivot.groupby(['Year','Indicator Name_x']).count().reset_index()
# wdi_unpivot_2=wdi_unpivot_2.rename(columns={'value': 'Country Count'})
%matplotlib widget
import seaborn as sns
import matplotlib.pyplot as plt

wdi_ghg_to_plot = wdi_ghg_unpivot
# wdi_ghg_to_plot = wdi_ghg_unpivot[wdi_ghg_unpivot['Indicator Name_x'].str.contains('methane')]
# wdi_ghg_to_plot = wdi_ghg_unpivot[wdi_ghg_unpivot['Total Emissions'] < 0]

sns.lineplot(
    data=wdi_ghg_to_plot, x="Year", y="Total Emissions", hue="Indicator Name_x"
)  # , legend=False)