In [1]:
import pandas as pd
import numpy as np

In [2]:
def column_properties(df):
    columns_prop = pd.DataFrame()
    columns_prop['column'] = df.columns.tolist()
    columns_prop['count_non_null'] = df.count().values
    columns_prop['count_null'] = df.isnull().sum().values
    columns_prop['perc_null'] = columns_prop['count_null'] * 100 / df.shape[0]

    #using df.nunique() is memory intensive and slow resulting in kernal death
    unique_list = []
    for col in df.columns.tolist():
        unique_list.append(df[col].value_counts().shape[0])
    columns_prop['count_unique'] =  unique_list
    
    columns_prop['dtype'] = df.dtypes.values
    columns_prop.set_index('column', inplace = True)
    return columns_prop

## Read Emission Data

In [3]:

file_name_emission = 'CO2_emission_estimates.csv'
emission_data = pd.read_csv(file_name_emission)
emission_data.shape

(2180, 5)

In [4]:
file_name_econ_dev = 'economic_devlopment.csv'
econ_dev_data = pd.read_csv(file_name_econ_dev)
columns = ['Economy', 'Country'	]
econ_dev_data = econ_dev_data[columns]
econ_dev_data.head()


Unnamed: 0,Economy,Country
0,Developing,Albania
1,Developing,Algeria
2,Developing,Angola
3,Developing,Argentina
4,Developing,Armenia


#### Merge the country develpment satus with main data

In [5]:
data = pd.merge( emission_data, econ_dev_data, on = 'Country', how = 'left')
data.head()

Unnamed: 0,Region,Country,Year,Series,Value,Economy
0,8,Albania,1975,Emissions (thousand metric tons of carbon diox...,4338.0,Developing
1,8,Albania,1985,Emissions (thousand metric tons of carbon diox...,6930.0,Developing
2,8,Albania,2005,Emissions (thousand metric tons of carbon diox...,3825.0,Developing
3,8,Albania,2010,Emissions (thousand metric tons of carbon diox...,3930.0,Developing
4,8,Albania,2015,Emissions (thousand metric tons of carbon diox...,3825.0,Developing


In [6]:
column_properties(data)

Unnamed: 0_level_0,count_non_null,count_null,perc_null,count_unique,dtype
column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Region,2180,0,0.0,144,int64
Country,2180,0,0.0,144,object
Year,2180,0,0.0,8,int64
Series,2180,0,0.0,2,object
Value,2180,0,0.0,1267,float64
Economy,2164,16,0.733945,2,object


In [7]:
data['Series'].value_counts()

Emissions per capita (metric tons of carbon dioxide)    1090
Emissions (thousand metric tons of carbon dioxide)      1090
Name: Series, dtype: int64

### Plot Emission By Year

In [8]:
agg = { 'Total_Emission': pd.NamedAgg(column= 'Value', aggfunc = 'sum'),
         'Avg_Emission':  pd.NamedAgg(column= 'Value', aggfunc = np.mean) }

emission_agg_by_year =  data.groupby(['Year', 'Series']).agg(**agg).reset_index() 
emission_agg_by_year['Year'] = emission_agg_by_year['Year'].apply(str)
emission_agg_by_year

Unnamed: 0,Year,Series,Total_Emission,Avg_Emission
0,1975,Emissions (thousand metric tons of carbon diox...,12342992.0,109230.017699
1,1975,Emissions per capita (metric tons of carbon di...,527.9,4.671681
2,1985,Emissions (thousand metric tons of carbon diox...,14420731.0,125397.66087
3,1985,Emissions per capita (metric tons of carbon di...,559.7,4.866957
4,2005,Emissions (thousand metric tons of carbon diox...,25792540.0,180367.412587
5,2005,Emissions per capita (metric tons of carbon di...,776.7,5.431469
6,2010,Emissions (thousand metric tons of carbon diox...,29150161.0,203847.27972
7,2010,Emissions per capita (metric tons of carbon di...,761.1,5.322378
8,2015,Emissions (thousand metric tons of carbon diox...,30857224.0,214286.277778
9,2015,Emissions per capita (metric tons of carbon di...,738.1,5.125694


In [9]:

from plotly import graph_objects as go
emissions = 'Emissions (thousand metric tons of carbon dioxide)'
plot_data = emission_agg_by_year[emission_agg_by_year.Series== emissions].copy()
# plot_data['Year'] = plot_data['Year'].apply(str)
fig = go.Figure( data = [
                                go.Bar( name = f"Total Emissions",
                                        x =  plot_data.Year.values,
                                        y= plot_data.Total_Emission.values,                                   
                                        text = plot_data.Total_Emission.values,
                                        # hovertemplate = "%{text}"
                                        ),                             
                                    
                                ])

fig.update_layout( 
        
                height=600,
                title = 'Total Emissions By Year',
            
                )
fig.update_yaxes(title_text='Total Emission (thousand MT of CO2 )')
fig.update_xaxes(title_text='Year')
fig.show() 


In [10]:
emissions_per_capita = 'Emissions per capita (metric tons of carbon dioxide)'
plot_data = emission_agg_by_year[emission_agg_by_year.Series== emissions_per_capita].copy()
# plot_data['Year'] = plot_data['Year'].apply(str)
fig = go.Figure( data = [
                                go.Bar( name = f"Total Emissions",
                                        x =  plot_data.Year.values,
                                        y= plot_data.Total_Emission.values,                                   
                                        text = plot_data.Total_Emission.values,
                                        # hovertemplate = "%{text}"
                                        ),                             
                                    
                                ])

fig.update_layout( 
        
                height=600,
                title = 'Total Emissions Per Capita By Year',
            
                )
fig.update_yaxes(title_text='Total Emission Per Capita (MT of CO2 )')
fig.update_xaxes(title_text='Year')
fig.show() 

In [22]:
from plotly.subplots import make_subplots
from plotly import graph_objects as go

fig = make_subplots(rows=1, cols=2, 
                         specs=[
                              [{"type": "bar"}, {"type": "bar"}],                           
                              ],
                         subplot_titles=("Total Emissions By Year", "Total Emissions Per Capita By Year", ))


#Subplot1; Total Emissions By Year
emissions_total = 'Emissions (thousand metric tons of carbon dioxide)'
total_emissions = emission_agg_by_year[emission_agg_by_year.Series== emissions_total]

fig.add_trace(
     
     go.Bar(name= f"Total Emissions By Year",x = total_emissions['Year'], y=total_emissions['Total_Emission'],   legendgroup = '1', showlegend=False,
                                   text = total_emissions['Total_Emission'], textposition='outside', texttemplate='%{text:.2s}'),
     row = 1,
     col = 1,
     

)

# fig.update_yaxes(title_text='Total Emission (thousand MT of CO2 )')
# fig.update_xaxes(title_text='Year')

#Subplot2; Total Emissions Per Capita By Year
emissions_total = 'Emissions per capita (metric tons of carbon dioxide)'
total_emissions = emission_agg_by_year[emission_agg_by_year.Series== emissions_total]

fig.add_trace(
     
     go.Bar(name= f"Total Emissions By Year",x = total_emissions['Year'], y=total_emissions['Total_Emission'],   legendgroup = '1', showlegend=False,
                                   text = total_emissions['Total_Emission'], textposition='outside', texttemplate='%{text:.2s}'),
     row = 1,
     col = 2,
     

)

# fig.update_yaxes(title_text='Total Emission Per Capita (MT of CO2 )')
# fig.update_xaxes(title_text='Year')


fig.update_layout(height=500,  legend_tracegroupgap = 20)
fig.show()


In [13]:
total_emissions

Unnamed: 0,Year,Series,Total_Emission,Avg_Emission
0,1975,Emissions (thousand metric tons of carbon diox...,12342992.0,109230.017699
2,1985,Emissions (thousand metric tons of carbon diox...,14420731.0,125397.66087
4,2005,Emissions (thousand metric tons of carbon diox...,25792540.0,180367.412587
6,2010,Emissions (thousand metric tons of carbon diox...,29150161.0,203847.27972
8,2015,Emissions (thousand metric tons of carbon diox...,30857224.0,214286.277778
10,2016,Emissions (thousand metric tons of carbon diox...,30819714.0,214025.791667
12,2017,Emissions (thousand metric tons of carbon diox...,31217941.0,216791.256944
14,2018,Emissions (thousand metric tons of carbon diox...,31864384.0,221280.444444
