### This notebook covers the EDA on Country emissions dataset with visulaizations using plotly,seaborn and geographical plots leave an upvote if you do like my work

In [None]:
!pip install chart_studio

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import chart_studio.plotly as py
import plotly.offline as po
import plotly.graph_objs as pg

#### Data Cleaning

In [None]:
df = pd.read_csv('/kaggle/input/global-fossil-co2-emissions-by-country-2002-2022/GCB2022v27_MtCO2_flat.csv')

In [None]:
df

In [None]:
df.isna().any()

In [None]:
#ISO codes we can fix for the countries

In [None]:
df[df['ISO 3166-1 alpha-3'].isna()==True]

In [None]:
countries = list(df[df['ISO 3166-1 alpha-3'].isna()==True]['Country'].unique())

In [None]:
len(countries)

In [None]:
#we drop the six countries simply
df = df[~df['Country'].isin(countries)]

In [None]:
#We can assume that if a coutry doesnt have any emissions then that to be zero (all the numerical columns)

In [None]:
df.fillna(0,inplace=True)

In [None]:
df.head()

In [None]:
#There are some rows for entire world data and international transport lets seperate that out
world_data = df[df['ISO 3166-1 alpha-3']=='WLD']

In [None]:
df = df[df['Country']!='Global']

In [None]:
transport_data = df[df['Country']=='International Transport']

In [None]:
df = df[df['Country']!='International Transport']

#### Visualizations

In [None]:
df.columns

In [None]:
#lets see the top 10 coutries in total emissions over the time

countries = list(df.groupby('Country').sum().sort_values(by='Total',ascending=False).index)
values = list(df.groupby('Country').sum().sort_values(by='Total',ascending=False)['Total'])

plt.figure(figsize=(12,5))
sns.set_style('darkgrid')
sns.barplot(x=countries[:10],y=values[:10],palette='Set2',edgecolor='.2')

In [None]:
px.pie(names=countries[:10],values=values[:10],width=600,height=400)

In [None]:
#Lets consider India for a small study over the trend of emissions

india_data = df[df['Country']=='India']

In [None]:
#Lets see the trend of emissions of India over the past 10 years

india_past_10 = india_data[-10:]

In [None]:
plt.figure(figsize=(12,5))
plt.subplot(121)
sns.lineplot(x='Year',y='Total',data=india_past_10)
plt.subplot(122)
sns.barplot(x='Year',y='Total',data=india_past_10,palette='Set3',edgecolor='.3')

In [None]:
#The dip in 2020 might be due to COVID19 and lockdowns all around but the trend has been clearly increasing all over years

In [None]:
#Trend of all types of emissions over last 10 years
columns = ['Coal', 'Oil', 'Gas','Cement', 'Flaring', 'Other']
plt.figure(figsize=(12,5))
fig = px.line(y=india_past_10['Total'],x=india_past_10['Year'],labels={'x':'Year','y':'Emissions'})
for i in columns:
    fig.add_scatter(y=india_past_10[str(i)],x=india_past_10['Year'],name=str(i))
fig.show()

In [None]:
#Correlation between the emissions
plt.figure(figsize=(12,6))
sns.heatmap(df.corr(),cmap='YlGnBu',annot=True)
plt.yticks(rotation='360')
plt.show()

In [None]:
#During the last 10 years which emissions gained higher amongst all
plt.figure(figsize=(12,5))
world_data_past_10 = world_data[-10:]
values_world = []
for i in columns:
    values_world.append(world_data_past_10.iloc[9][str(i)]-world_data_past_10.iloc[0][str(i)])
values_india = []
for i in columns:
    values_india.append(india_past_10.iloc[9][str(i)]-india_past_10.iloc[0][str(i)])
plt.subplot(121)
sns.barplot(x=columns,y=values_world,palette='Set2',edgecolor='.3')
plt.title('World')
plt.subplot(122)
sns.barplot(x=columns,y=values_india,palette='Set2',edgecolor='.3')    
plt.title('India')

In [None]:
#Emissions due to Gas are majority in World where as emissions due to Coal have gained the most during past 10 years

In [None]:
afg = df[df['ISO 3166-1 alpha-3']=='AFG']
#lets analyse the data of AFG over pas 30 years which has been continously affected by war
afg_past_30 = afg[-30:]

In [None]:
cols = ['Total']+columns
plt.figure(figsize=(12,5))
sns.lineplot(x=afg_past_30['Year'],y=afg_past_30['Total'])
for i in columns:
    sns.lineplot(x=afg_past_30['Year'],y=afg_past_30[str(i)])
plt.legend(cols)
plt.title('Afghanistan emission trend of past 30 years')

In [None]:
#The emissions have been increased due to formations of more organized trade and goverment in late 2000s
#Most of the emissions are again from coal

In [None]:
#top 10 countreis with most average emissions over last 10 years
plt.figure(figsize=(10,5))
data_past_10 = df[df['Year']>=2011]
avg_emissions = data_past_10.groupby('Country').mean().sort_values(by='Total',ascending=False)[:10].reset_index()

sns.scatterplot(x='Total',y='Per Capita',data=avg_emissions,hue='Country')

In [None]:
#We can see here that the developing countries taking up the lower portion of plot and developed portions the upper

In [None]:
#Which countries have the highest average emissions in each sectors over the past 10 years (top 5)
for i in columns:
    print('SECTOR>>>>',str(i))
    avg_data = data_past_10.groupby('Country').mean().sort_values(by=str(i),ascending=False)[:10].reset_index()
    countries = avg_data['Country'].values[:5]
    for j in countries:
        print(j)

In [None]:
#Geographical Plots sector wise
avg_data = data_past_10.groupby('Country').mean().reset_index()

In [None]:
countries = avg_data['Country'].values
ISO = []
for i in countries:
    ISO.append(data_past_10[data_past_10['Country']==str(i)]['ISO 3166-1 alpha-3'].unique()[0])

In [None]:
cols.append('Per Capita')

In [None]:
colors = ['magenta','gnbu','purp','turbo','ice','curl','oxy','haline']

In [None]:
for index,i in enumerate(cols):
    data = dict(type='choropleth', 
                colorscale = str(colors[index]),
                locations = ISO, 
                z = avg_data[str(i)])
    if i!='Per Capita':
        layout = dict(title = str(i)+ ' Emissions By Country',
                      geo = dict( projection = {'type':'robinson'},
                                 showlakes = False))
    else:
        layout = dict(title = 'Per Capita By Country',
                      geo = dict( projection = {'type':'robinson'},
                                 showlakes = False))
    x = pg.Figure(data = [data], 
                  layout = layout)
    po.iplot(x)