# Graphs using Pyplot

In [32]:
#Adding the dependancies
# Add Matplotlib inline magic command
%matplotlib inline
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np


In [33]:
#Retrieving the clean merged Emission-Population data from S3 bucket to a data frame.
new_normalized_df=pd.read_csv('https://dataanalyticsproject.s3.us-east-2.amazonaws.com/Merged_L5000.csv',index_col=0)
new_normalized_df.head()


Unnamed: 0,Area_Code,Area,Item_Code,Item,Element_Code,Element,Year,Emission,Population
0,2,Afghanistan,5058,Enteric Fermentation,7225,Emissions (CH4),1990,178.4682,12412.308
1,2,Afghanistan,5058,Enteric Fermentation,724413,Emissions (CO2eq) from CH4 (AR5),1990,4997.1108,12412.308
2,2,Afghanistan,5058,Enteric Fermentation,723113,Emissions (CO2eq) (AR5),1990,4997.1108,12412.308
3,2,Afghanistan,5059,Manure Management,7225,Emissions (CH4),1990,8.5165,12412.308
4,2,Afghanistan,5059,Manure Management,7230,Emissions (N2O),1990,0.3046,12412.308


In [34]:
#checking for the last data Area_Code
x=new_normalized_df.sort_values(["Area_Code"], ascending=False)
x.head()

Unnamed: 0,Area_Code,Area,Item_Code,Item,Element_Code,Element,Year,Emission,Population
437774,351,China,6516,Land Use change,724313,Emissions (CO2eq) from N2O (AR5),2008,0.0503,1383985.631
658925,351,China,6993,Fires in organic soils,724413,Emissions (CO2eq) from CH4 (AR5),2017,0.0,1452625.244
658918,351,China,6992,Forest fires,7225,Emissions (CH4),2017,5.8977,1452625.244
658919,351,China,6992,Forest fires,7230,Emissions (N2O),2017,0.5385,1452625.244
658920,351,China,6992,Forest fires,724413,Emissions (CO2eq) from CH4 (AR5),2017,165.1365,1452625.244


In [35]:
#Filtering Elementwise emission during years
element_df = new_normalized_df.groupby(["Element","Year"]).sum()["Emission"].reset_index()
element_df

Unnamed: 0,Element,Year,Emission
0,Direct emissions (N2O),1990,25580.1355
1,Direct emissions (N2O),1991,25494.6518
2,Direct emissions (N2O),1992,25514.4306
3,Direct emissions (N2O),1993,25125.6344
4,Direct emissions (N2O),1994,25601.7356
...,...,...,...
235,Indirect emissions (N2O),2015,10075.2076
236,Indirect emissions (N2O),2016,10216.9735
237,Indirect emissions (N2O),2017,10309.0876
238,Indirect emissions (N2O),2018,10242.4403


## Due to 50MB size limit in Git,the charts created below are saved in the "Image" folder of our repository

In [55]:
#Elementwise emission during years

import plotly.express as px
fig = px.line(element_df, x="Year", y="Emission", title="Trend of Generated Elements",width=900,height=720,color='Element')
# This styles the line
fig.update_traces(line=dict(width=5))
fig.show()
#The output image is saved as "Trend-generated-elements" in Image Folder

In [37]:
#Countrywise total Emission
area_df =new_normalized_df.groupby(["Area"]).sum()["Emission"].reset_index()
area_df

Unnamed: 0,Area,Emission
0,Afghanistan,4.016315e+06
1,Albania,1.123135e+06
2,Algeria,3.175846e+06
3,American Samoa,-6.794315e+03
4,Andorra,4.465506e+03
...,...,...
235,Western Sahara,-6.420852e+04
236,Yemen,2.391822e+06
237,Yugoslav SFR,4.322671e+05
238,Zambia,1.937541e+07


In [38]:
#Sorting the countries based on Emission
area_df = area_df.sort_values(["Emission"], ascending=False).reset_index()

area_df

Unnamed: 0,index,Area,Emission
0,28,Brazil,4.551232e+08
1,102,Indonesia,3.062385e+08
2,101,India,2.101129e+08
3,43,China,1.609322e+08
4,47,"China, mainland",1.591492e+08
...,...,...,...
235,31,Bulgaria,-1.477058e+06
236,174,Republic of Korea,-2.161456e+06
237,42,Chile,-2.870036e+06
238,176,Romania,-2.895557e+06


In [39]:
#Taking Top 25 countries based on Emission
area_df=area_df.iloc[0:25]
area_df.count()


index       25
Area        25
Emission    25
dtype: int64

In [54]:
# Create the bar plot

fig = px.bar(area_df, x="Area", y="Emission", title="   Top 25 Countries with the most Emission",width=1050,height=750)
fig.update_layout(barmode='group', xaxis_tickangle=-45)
fig.show()
#The output image is saved as "top25-most-emission" in the Image Folder 

In [41]:
#Countrywise total Emission
area_df =new_normalized_df.groupby(["Area"]).sum()["Emission"].reset_index()
area_df

Unnamed: 0,Area,Emission
0,Afghanistan,4.016315e+06
1,Albania,1.123135e+06
2,Algeria,3.175846e+06
3,American Samoa,-6.794315e+03
4,Andorra,4.465506e+03
...,...,...
235,Western Sahara,-6.420852e+04
236,Yemen,2.391822e+06
237,Yugoslav SFR,4.322671e+05
238,Zambia,1.937541e+07


In [42]:
#Sorting the countries based on Emission in ascending order
area_df = area_df.sort_values(["Emission"], ascending=True).reset_index()

area_df

Unnamed: 0,index,Area,Emission
0,177,Russian Federation,-5.288726e+07
1,176,Romania,-2.895557e+06
2,42,Chile,-2.870036e+06
3,174,Republic of Korea,-2.161456e+06
4,31,Bulgaria,-1.477058e+06
...,...,...,...
235,47,"China, mainland",1.591492e+08
236,43,China,1.609322e+08
237,101,India,2.101129e+08
238,102,Indonesia,3.062385e+08


In [43]:
#Locating Top 10 countries with Less emission
area_df=area_df.iloc[0:10]
area_df.count()

index       10
Area        10
Emission    10
dtype: int64

In [44]:
# Create the bar plot

fig = px.bar(area_df, x="Area", y="Emission", title="Top 10 countries with the least Emission",width=1050,height=750)
fig.update_layout(barmode='group', xaxis_tickangle=-45)
fig.show
#The output image is saved as "top10-least-Emission" in the Image Folder .


In [45]:
#Yearwise emission
year_df = new_normalized_df.groupby(["Year"]).sum()["Emission"].reset_index()


In [46]:
# A scatter plot showing yearwise emission
fig = px.scatter(year_df, x="Year", y="Emission", color="Year",
                 size='Emission', hover_data=['Emission'],title="Emission over Years")
fig.show()
#The output image is saved as "emission-over-years" in the Image Folder 

In [47]:
#Finding total emission by each of the items.
item_tot_df = new_normalized_df.groupby(["Item"]).sum()["Emission"]
item_tot_df=item_tot_df.reset_index()


In [48]:
#Filtering the data based on Element CH4
item_ch4_df=new_normalized_df.loc[(new_normalized_df['Element_Code'] ==7225) ]
item_ch4_df.head()

Unnamed: 0,Area_Code,Area,Item_Code,Item,Element_Code,Element,Year,Emission,Population
0,2,Afghanistan,5058,Enteric Fermentation,7225,Emissions (CH4),1990,178.4682,12412.308
3,2,Afghanistan,5059,Manure Management,7225,Emissions (CH4),1990,8.5165,12412.308
8,2,Afghanistan,5060,Rice Cultivation,7225,Emissions (CH4),1990,24.5,12412.308
31,2,Afghanistan,5066,Burning - Crop residues,7225,Emissions (CH4),1990,2.6977,12412.308
40,2,Afghanistan,6795,Savanna fires,7225,Emissions (CH4),1990,0.3968,12412.308


In [49]:
#Ploting CH4 Emissions
fig = px.pie(item_ch4_df, values='Emission', names='Item',color='Item',title='The Items causing CH4 Emissions')
fig.show()
#The output image is saved as "CH4-emission" in the Image Folder 

In [50]:
#Filtering the data based on Element N2O
item_n2o_df=new_normalized_df.loc[(new_normalized_df['Element_Code'] ==7230) ]
item_n2o_df.head()

Unnamed: 0,Area_Code,Area,Item_Code,Item,Element_Code,Element,Year,Emission,Population
4,2,Afghanistan,5059,Manure Management,7230,Emissions (N2O),1990,0.3046,12412.308
13,2,Afghanistan,5061,Synthetic Fertilizers,7230,Emissions (N2O),1990,0.9182,12412.308
18,2,Afghanistan,5062,Manure applied to Soils,7230,Emissions (N2O),1990,0.9817,12412.308
23,2,Afghanistan,5063,Manure left on Pasture,7230,Emissions (N2O),1990,6.002,12412.308
28,2,Afghanistan,5064,Crop Residues,7230,Emissions (N2O),1990,0.7759,12412.308


In [51]:
#Ploting the items causing N2O Emissions
fig = px.pie(item_n2o_df, values='Emission', names='Item',color='Item',title='The Items causing N2O Emissions')
fig.show()
#The output image is saved as "N2O-emission" in the Image Folder 

In [52]:
#Filtering the data based on Element N2O
item_co2_df=new_normalized_df.loc[(new_normalized_df['Element_Code'] ==7273) ]
item_co2_df.head()

Unnamed: 0,Area_Code,Area,Item_Code,Item,Element_Code,Element,Year,Emission,Population
36,2,Afghanistan,6750,Net Forest conversion,7273,Emissions (CO2),1990,0.0,12412.308
38,2,Afghanistan,6751,Forestland,7273,Emissions (CO2),1990,-2388.803,12412.308
56,2,Afghanistan,6993,Fires in organic soils,7273,Emissions (CO2),1990,0.0,12412.308
61,2,Afghanistan,6994,On-farm energy use,7273,Emissions (CO2),1990,274.7953,12412.308
79,2,Afghanistan,1707,LULUCF,7273,Emissions (CO2),1990,-2388.803,12412.308


In [53]:
#Ploting CO2 Emissions
fig = px.pie(item_co2_df, values='Emission', names='Item',color='Item',title='The Items causing CO2 Emissions')
fig.show()
#The output image is saved as "CO2-emission" in the Image Folder 
