In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
!ls ../input/

In [4]:
contries = pd.read_csv("../input/Country.csv")

In [5]:
contries.shape # total contries

In [6]:
indiaIndex = contries[contries["CountryCode"] == "IND"].index
# IND for india

In [7]:
contries.loc[indiaIndex]

In [8]:
indicators = pd.read_csv("../input/Indicators.csv")

In [14]:
indicators.shape # total data in the df

In [9]:
# create a filter for the countries
indiaMask = indicators['CountryCode'] == "IND"
usaMask = indicators['CountryCode'] == "USA"

In [10]:
indicators[indiaMask].head()

In [11]:
indicators[usaMask].head()

In [12]:
print(indicators[indiaMask]['Year'].unique().size)
print(indicators[usaMask]['Year'].unique().size)
# years through which the data is collected. (1960 to 2015)

### Exploring the Relationship between CO2 and GDP and distibution of CO2 emissions

# INDIA

In [13]:
# How many unique indicators do we have here. 
uniqIndicators = indicators[indiaMask]['IndicatorName'].unique()
uniqIndicatorsSize = uniqIndicators.size
print(uniqIndicatorsSize)

In [15]:
# all indicators with GDP in them 
# this step is just used to view all the indicators and select the most appropritate one
for i in range(uniqIndicatorsSize):
    if "GDP" in uniqIndicators[i]:
        print(uniqIndicators[i])    

In [16]:
# all indicators with GDP in them
# only needed to view all the indicators and to select the most appropritate one
for i in range(uniqIndicatorsSize):
    if "CO2" in uniqIndicators[i]:
        print(uniqIndicators[i])    

In [17]:
# In this step we create different criterias which as basically indicators that we selected from the output of the previous step
# We then use these to create masks that will be used to filter out the data from the main dataframe
# criteria
co2PerCap = "CO2 emissions \(metric tons per capita\)"
gdpPerCap = "GDP per capita \(constant 2005 US\$\)"
co2Gas = 'CO2 emissions from gaseous fuel consumption \(kt\)'
co2Liq = 'CO2 emissions from liquid fuel consumption \(kt\)'
co2Sol = 'CO2 emissions from solid fuel consumption \(kt\)'
# masks
co2PerCapMask = indicators['IndicatorName'].str.contains(co2PerCap)
gdpPerCapMask = indicators['IndicatorName'].str.contains(gdpPerCap)
co2GasMask = indicators['IndicatorName'].str.contains(co2Gas)
co2LiqMask = indicators['IndicatorName'].str.contains(co2Liq)
co2SolMask = indicators['IndicatorName'].str.contains(co2Sol)

## CO2 

In [18]:
# df with co2 per capita of india
co2Indicator = indicators[indiaMask & co2PerCapMask]
plt.figure(figsize=(12,8))
plt.plot(co2Indicator['Year'].values , co2Indicator['Value'].values, color='blue', label='CO2 per capita')
plt.xlabel("Year")
plt.ylabel(co2Indicator['IndicatorName'].iloc[0])
plt.title('CO2 Emissions in India')
plt.legend()
plt.axis([1960,2015,0,2.0])
plt.show()

## GDP

In [20]:
gdpIndicator = indicators[indiaMask & gdpPerCapMask]
gdpIndicatorTrunc = gdpIndicator[gdpIndicator['Year']<2012]

In [21]:
plt.figure(figsize=(12,8))
plt.plot(gdpIndicator['Year'].values , gdpIndicator['Value'].values, 'b-', label='GDP per capita')
plt.xlabel("Year")
plt.ylabel(gdpIndicator['IndicatorName'].iloc[0])
plt.title('GDP per capita (constant 2005 US$)')
plt.legend()
plt.axis([1960,2015,0,1300])
plt.show()

## Correlation b/w GDP and CO2

In [24]:
plt.figure(figsize=(12,8))
plt.scatter(gdpIndicatorTrunc['Value'], co2Indicator['Value'])
plt.xlabel(gdpIndicatorTrunc['IndicatorName'].iloc[0])
plt.ylabel(co2Indicator['IndicatorName'].iloc[0])
plt.title("Correlation between GDP and CO2 emissions")
plt.axis([0,1300,0,2.0])
plt.show()

In [26]:
np.corrcoef(gdpIndicatorTrunc['Value'], co2Indicator['Value'])

### We have almost a 96% correlation between GDP and the CO2 emmisions

## Distribution of CO2 emission by type of fuel

In [27]:
plt.figure(figsize=(12,8))
plt.plot(indicators[co2GasMask & indiaMask]['Year'].values , indicators[co2GasMask & indiaMask]['Value'].values, 'b-', label='gaseous')
plt.plot(indicators[co2LiqMask & indiaMask]['Year'].values , indicators[co2LiqMask & indiaMask]['Value'].values, 'r-', label='liquid')
plt.plot(indicators[co2SolMask & indiaMask]['Year'].values , indicators[co2SolMask & indiaMask]['Value'].values, 'g-', label='solid')
plt.xlabel("Year")
plt.ylabel('CO2 emissions kt')
plt.title('CO2 emissions from solid, liquid and gaseous fuel in INDIA')
plt.axis([1960,2015,0,1500000])
plt.legend()
plt.show()

## Distribution of CO2 emmisions by activities

In [28]:
# indicators
co2Elec = 'CO2 emissions from electricity and heat production, total \(\% of total fuel combustion\)'
co2Manu = 'CO2 emissions from manufacturing industries and construction \(\% of total fuel combustion\)'
co2OtherEx = 'CO2 emissions from other sectors, excluding residential buildings and commercial and public services \(\% of total fuel combustion\)'
co2Public = 'CO2 emissions from residential buildings and commercial and public services \(\% of total fuel combustion\)'
co2Trans = 'CO2 emissions from transport \(\% of total fuel combustion\)'
# masks
co2ElecMask = indicators['IndicatorName'].str.contains(co2Elec)
manuMask = indicators['IndicatorName'].str.contains(co2Manu)
otherMask = indicators['IndicatorName'].str.contains(co2OtherEx)
publicMask = indicators['IndicatorName'].str.contains(co2Public)
transMask = indicators['IndicatorName'].str.contains(co2Trans)
elecMask = indicators['IndicatorName'].str.contains(co2Elec)

In [29]:
plt.figure(figsize=(12,8))
plt.plot(indicators[elecMask & indiaMask]['Year'].values, indicators[elecMask & indiaMask]['Value'].values, 'b-', label='Electricity and Heating')
plt.plot(indicators[manuMask & indiaMask]['Year'].values, indicators[manuMask & indiaMask]['Value'].values, 'r-', label='Manufacturing and Construction')
plt.plot(indicators[otherMask & indiaMask]['Year'].values, indicators[otherMask & indiaMask]['Value'].values, 'g-', label='Other sectors excluding resi, comm, pub')
plt.plot(indicators[publicMask & indiaMask]['Year'].values, indicators[publicMask & indiaMask]['Value'].values, 'y-', label='Residential, Commercial, Public')
plt.plot(indicators[transMask & indiaMask]['Year'].values, indicators[transMask & indiaMask]['Value'].values, color='black', label='Transport')
plt.legend(loc='best')
plt.axis([1970, 2013, 0, 60])
plt.title('CO2 emissions from economic activities in INDIA')
plt.xlabel('Year')
plt.ylabel('Percentage of total fuel consumption')
plt.show()

## USA

### USA CO2

In [30]:
plt.figure(figsize=(12,8))
plt.plot(indicators[usaMask & co2PerCapMask]['Year'].values, indicators[usaMask & co2PerCapMask]['Value'].values, color='black', label='CO2 per capita')
plt.xlabel("Year")
plt.ylabel(indicators[co2PerCapMask]['IndicatorName'].iloc[0])
plt.title("CO2 emissions of USA")
plt.legend()
plt.axis([1960, 2014, 0, 25])

### USA GDP 

In [31]:
plt.figure(figsize=(12,8))
plt.plot(indicators[usaMask & gdpPerCapMask]['Year'].values, indicators[usaMask & gdpPerCapMask]['Value'].values, color='black', label='GDP per capita')
plt.xlabel("Year")
plt.ylabel(indicators[gdpPerCapMask]['IndicatorName'].iloc[0])
plt.title("GDP per capita of USA")
plt.legend()
plt.axis([1960, 2016, 0, 50000])

### GDP vs CO2 emissions correlation

In [32]:
plt.figure(figsize=(12,8))
gdpUsaIndicator = indicators[usaMask & gdpPerCapMask]
gdpUsaIndicatorTrunc = gdpUsaIndicator[gdpUsaIndicator['Year']<2012]
plt.scatter(gdpUsaIndicatorTrunc['Value'].values, indicators[usaMask & co2PerCapMask]['Value'].values)
plt.show()

In [33]:
np.corrcoef(gdpUsaIndicatorTrunc['Value'].values, indicators[usaMask & co2PerCapMask]['Value'].values)

The Correlation is 0.07 which is negligible. 

## CO2 by type of fuel

In [34]:
plt.figure(figsize=(12,8))
plt.plot(indicators[co2GasMask & usaMask]['Year'].values , indicators[co2GasMask & usaMask]['Value'].values, 'b-', label='gaseous')
plt.plot(indicators[co2LiqMask & usaMask]['Year'].values , indicators[co2LiqMask & usaMask]['Value'].values, 'r-', label='liquid')
plt.plot(indicators[co2SolMask & usaMask]['Year'].values , indicators[co2SolMask & usaMask]['Value'].values, 'g-', label='solid')
plt.xlabel("Year")
plt.ylabel('CO2 emissions kt')
plt.title('CO2 emissions from solid, liquid and gaseous fuel in USA')
plt.legend()
plt.axis([1960, 2015, 0, 2600000])
plt.show()

## Percentage by economic activity

In [35]:
plt.figure(figsize=(12,8))
plt.plot(indicators[elecMask & usaMask]['Year'].values, indicators[elecMask & usaMask]['Value'].values, 'b-', label='Electricity and Heating')
plt.plot(indicators[manuMask & usaMask]['Year'].values, indicators[manuMask & usaMask]['Value'].values, 'r-', label='Manufacturing and Construction')
plt.plot(indicators[otherMask & usaMask]['Year'].values, indicators[otherMask & usaMask]['Value'].values, 'g-', label='Other sectors excluding resi, comm, pub')
plt.plot(indicators[publicMask & usaMask]['Year'].values, indicators[publicMask & usaMask]['Value'].values, 'y-', label='Residential, Commercial, Public')
plt.plot(indicators[transMask & usaMask]['Year'].values, indicators[transMask & usaMask]['Value'].values, color='black', label='Transport')
plt.legend(loc='best')
plt.axis([1960, 2013, 0, 50])
plt.title('CO2 emissions from economic activities in USA')
plt.xlabel('Year')
plt.ylabel('Percentage of total fuel consumption')
plt.show()

# Findings (1)
CO2 emissions per Capita and GDP per capita seems to be highly correlated for
India while they are not correlated at all for the USA
- Steady growth of CO2 during the time frame (India)
- Steady growth of GDP during the time frame (India)
- Stable CO2 levels with no growth (USA)
- Steady increase in GDP per capita (USA
- A correlation of 0.967 is calculated (India)
- A correlation of 0.076 is calculated (USA)
- India seems to consume more solid fuels, followed by liquid fuels followed by
gaseous fuels (India)
- The rate of consumption of all the forms of fuels have increasing with the rate
for solid fuels rising steeply (India)
- Usa seems to consume liquid fuels more, followed by solids, followed by
gaseous. (USA)
- The consumption rates have been somewhat steady over the time-frame with
the rate for liquid fuels rising sharply during the 1970s and stabilizing
thereafter.