In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt

%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
country_df = pd.read_csv("data/country_data.csv", encoding="latin-1")
country_df.head()

Unnamed: 0,Economy,Code,Region,Income group,Lending category,Other
0,Afghanistan,AFG,South Asia,Low income,IDA,HIPC
1,Albania,ALB,Europe & Central Asia,Upper middle income,IBRD,
2,Algeria,DZA,Middle East & North Africa,Upper middle income,IBRD,
3,American Samoa,ASM,East Asia & Pacific,Upper middle income,..,
4,Andorra,AND,Europe & Central Asia,High income,..,


<IPython.core.display.Javascript object>

In [3]:
def addIndicatorToMainTable(df, wb_df, indicator_df, short_code):

    df.dropna(axis=1, how="all", inplace=True)

    # add to indicator table
    indicator_cols = ["Indicator Name", "Indicator Code"]
    indic_df = df[indicator_cols].drop_duplicates()
    indic_df["Indicator Short Code"] = short_code
    indicator_df = indicator_df.append(indic_df, sort=False)

    # melt table
    df.drop(columns=indicator_cols, inplace=True)
    df_m = df.melt(
        id_vars=["Country Name", "Country Code"], var_name="year", value_name=short_code
    )

    # add to main table
    if len(wb_df.columns) == 2:
        wb_df = wb_df.merge(df_m, on=["Country Name", "Country Code"], how="outer")
    else:
        wb_df = wb_df.merge(
            df_m, on=["Country Name", "Country Code", "year"], how="outer"
        )
    return wb_df, indicator_df

<IPython.core.display.Javascript object>

In [4]:
indicator_list = ["GDP.MKTP", "GDP.PCAP", "CO2E.KT", "CO2E.PC", "GHGT.KT"]

<IPython.core.display.Javascript object>

In [5]:
indicator_df = pd.DataFrame(
    columns=["Indicator Name", "Indicator Code", "Indicator Short Code"]
)

# loop through tables and create wb_df and inidicator_df tables
for indic in indicator_list:
    for folder in os.listdir("data"):
        if indic in folder and "zip" not in folder:
            # print(folder)
            for file in os.listdir("data/" + folder):
                # print(file)
                df = pd.read_csv("data/{}/{}".format(folder, file), skiprows=4)
                # create wb_df if doesnt exist
                if "wb_df" not in locals():
                    wb_df = df[["Country Name", "Country Code"]]
                wb_df, indicator_df = addIndicatorToMainTable(
                    df, wb_df, indicator_df, short_code=indic
                )
                break

# convert_year to int
wb_df["year"] = wb_df["year"].astype("int")

<IPython.core.display.Javascript object>

In [6]:
# sort values by country and year
wb_df.sort_values(by=["Country Name", "year"], inplace=True)

<IPython.core.display.Javascript object>

In [7]:
## save wb_df and indicator_df
wb_df.to_csv("data/co2_gdp_wb.csv", index=False)
indicator_df.to_csv("data/indicators.csv", index=False)

<IPython.core.display.Javascript object>

In [8]:
indicator_df

Unnamed: 0,Indicator Name,Indicator Code,Indicator Short Code
0,"GDP, PPP (current international $)",NY.GDP.MKTP.PP.CD,GDP.MKTP
0,"GDP per capita, PPP (current international $)",NY.GDP.PCAP.PP.CD,GDP.PCAP
0,CO2 emissions (kt),EN.ATM.CO2E.KT,CO2E.KT
0,CO2 emissions (metric tons per capita),EN.ATM.CO2E.PC,CO2E.PC
0,Total greenhouse gas emissions (kt of CO2 equi...,EN.ATM.GHGT.KT.CE,GHGT.KT


<IPython.core.display.Javascript object>

In [9]:
## get only countries (country codes) and data from 1990
wb_df = (
    wb_df.loc[
        (wb_df["Country Code"].isin(list(country_df["Code"]))) & (wb_df["year"] >= 1990)
    ]
    .sort_values(by="year")
    .reset_index(drop=True)
)
wb_df.head()

Unnamed: 0,Country Name,Country Code,year,GDP.MKTP,GDP.PCAP,CO2E.KT,CO2E.PC,GHGT.KT
0,Afghanistan,AFG,1990,,,2614.571,0.210643,12528.02
1,"Venezuela, RB",VEN,1990,186253900000.0,9486.936761,122162.438,6.222407,210102.6
2,Argentina,ARG,1990,240729400000.0,7380.115031,112147.861,3.438151,266555.0
3,Nigeria,NGA,1990,207650200000.0,2180.914504,39196.563,0.411675,163274.2
4,Germany,DEU,1990,1542829000000.0,19423.019762,,,1256074.0


<IPython.core.display.Javascript object>

In [10]:
def CO2vsGDPGraphs(cntry_code):
    """
    create and save fig with GDP and CO2 emissions data
    """
    cntry_df = wb_df.loc[wb_df["Country Code"] == cntry_code].reset_index(drop=True)
    cntry_name = cntry_df["Country Name"].unique().item()

    fig = plt.figure()
    ax = plt.axes()

    ax.plot(cntry_df["year"], cntry_df["CO2E.PC"], color="r")
    ax.set_ylabel("CO2 emissions per capita (ton)", color="r")

    ax2 = ax.twinx()
    ax2.plot(cntry_df["year"], cntry_df["GDP.PCAP"], color="b")
    ax2.set_ylabel("GDP per capita (current US$ - PPP)", color="b")
    ax.set_xticks(cntry_df.loc[::5, "year"])
    fig.text(0.9, 0.01, "source: World Bank", ha="right")
    fig.text(
        0.5,
        0.95,
        "{} CO2 Emissions and GDP per capita".format(cntry_name),
        weight="bold",
        size=14,
        ha="center",
    )
    #     fig.savefig(
    #         "graphs/{}_CO2_GDP_PCAP.jpg".format(cntry_code), dpi=200, bbox_inches="tight"
    #     )
    plt.close("all")

    return fig
    # plt.show()

<IPython.core.display.Javascript object>

In [11]:
## get 50 countries with highest total GDP in latest year (2018)
top50gdp = (
    wb_df.loc[wb_df.year == wb_df.year.max()]
    .sort_values(by="GDP.MKTP", ascending=False)
    .reset_index(drop=True)
    .iloc[0:50]
)
# top50gdp["Country Code"]

<IPython.core.display.Javascript object>

In [111]:
#export graphs for top50gdp countries
for cntry_code in top50gdp["Country Code"]:
    CO2vsGDPGraphs(cntry_code)

<IPython.core.display.Javascript object>

# Holoviews

In [30]:
import holoviews as hv, panel as pn, param

hv.extension("matplotlib", "bokeh")

<IPython.core.display.Javascript object>

In [71]:
class CO2GDPhv(param.Parameterized):
    country_name = param.Selector(
        default="United States", objects=wb_df["Country Name"].unique()
    )
    cntry_code = param.Selector(default="USA", objects=wb_df["Country Code"].unique())

    @pn.depends("cntry_code")
    def pnCO2_GDP_PCAP(self, view_fn=CO2vsGDPGraphs):
        return view_fn(self.cntry_code)

    @param.depends("country_name", watch=True)
    def update_cntry_code(self):
        £print("update_cntry_code")
        self.cntry_code = country_df.loc[
            country_df["Economy"] == self.country_name, "Code"
        ].item()

    @param.depends("cntry_code", watch=True)
    def view_graph(self):
        graph = self.pnCO2_GDP_PCAP()

        return pn.panel(graph)


test = CO2GDPhv()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [79]:
final = pn.Column(test.view_graph, test.param.country_name)

<IPython.core.display.Javascript object>

In [81]:
final

<IPython.core.display.Javascript object>

Unnamed: 0,Country Name,Country Code,year,GDP.MKTP,GDP.PCAP,CO2E.KT,CO2E.PC,GHGT.KT
182,United States,USA,1990,5963144000000.0,23888.600009,4823403.118,19.322751,6136093.5


<IPython.core.display.Javascript object>