## CO2 Emissions Data

Data from: [OWID](https://ourworldindata.org/explorers/co2?facet=none&country=CHN~USA~IND~GBR~OWID_WRL&Gas+or+Warming=CO%E2%82%82&Accounting=Production-based&Fuel+or+Land+Use+Change=All+fossil+emissions&Count=Per+country)


In [1]:
# This file contains several measurements including total emissions (co2) per year, per capita (co2_per_capita)
# and cumulative (cumulative_co2) 
# It is for all countries and also political/geographical regions including the World

import pandas as pd
df0 = pd.read_csv('owid-co2-data.csv')


In [2]:
# The codebook for the file above
codebook = pd.read_csv('owid-co2-codebook.csv')
codebook

Unnamed: 0,column,description,source
0,country,Geographic location.,Our World in Data
1,year,Year of observation.,Our World in Data
2,iso_code,"ISO 3166-1 alpha-3, three-letter country codes.",International Organization for Standardization
3,population,Population by country and year.,Population based on various sources (2023) [ht...
4,gdp,Gross domestic product measured in internation...,Maddison Project Database 2020 (Bolt and van Z...
...,...,...,...
74,temperature_change_from_n2o,Change in global mean surface temperature (in ...,Jones et al. (2023) [https://zenodo.org/record...
75,total_ghg,Total greenhouse gas emissions including land-...,Climate Watch - Greenhouse gas emissions by se...
76,total_ghg_excluding_lucf,Total greenhouse gas emissions excluding land-...,Climate Watch - Greenhouse gas emissions by se...
77,trade_co2,Annual net carbon dioxide (CO₂) emissions embe...,Global Carbon Budget (2022) [https://globalcar...


### Countries only CO2 data
not regions

In [3]:
# This file has a subset of the data and only for countries
# we are going to use it to get the top 50 emitting countries

import pandas as pd
df2 = pd.read_csv('countries_df.csv')

In [4]:
# Sort the df by co2 emissions in 2021
df2021 = df2[df2['Year']==2021].sort_values(by = 'Annual CO₂ emissions', ascending=False)
all_countries = df2021['Entity']
# save it (just in case)
df2021.to_csv('CO2_2021_ordered.csv')
# get the top 50 and extract the country names
df2021_top20 = df2021.head(20)
countries = df2021_top20['Entity']
countries


6956              China
35676     United States
15518             India
27491            Russia
17066             Japan
15862              Iran
12766           Germany
29081      Saudi Arabia
15690         Indonesia
31032       South Korea
6096             Canada
4832             Brazil
34300            Turkey
30860      South Africa
21436            Mexico
2008          Australia
35504    United Kingdom
16722             Italy
26459            Poland
36708           Vietnam
Name: Entity, dtype: object

In [5]:
# Now use the country list to filter the original data for the top 50 emitters

df0_Top20 = df0[df0['country'].isin(countries)]
df0_Top20.info()
lst=['country','year','co2','co2_per_capita','population']
data = df0_Top20[df0_Top20.columns.intersection(lst)]
data[3600:3650]


<class 'pandas.core.frame.DataFrame'>
Int64Index: 3867 entries, 3498 to 49466
Data columns (total 79 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   country                                    3867 non-null   object 
 1   year                                       3867 non-null   int64  
 2   iso_code                                   3867 non-null   object 
 3   population                                 3755 non-null   float64
 4   gdp                                        2527 non-null   float64
 5   cement_co2                                 2012 non-null   float64
 6   cement_co2_per_capita                      2012 non-null   float64
 7   co2                                        3336 non-null   float64
 8   co2_growth_abs                             3308 non-null   float64
 9   co2_growth_prct                            3223 non-null   float64
 10  co2_including_luc   

Unnamed: 0,country,year,population,co2,co2_per_capita
48064,United States,1927,115040131.0,1862.839,16.193
48065,United States,1928,116730690.0,1830.965,15.685
48066,United States,1929,118261782.0,1963.506,16.603
48067,United States,1930,119629770.0,1744.916,14.586
48068,United States,1931,120830953.0,1484.959,12.29
48069,United States,1932,121861566.0,1257.814,10.322
48070,United States,1933,122717780.0,1348.877,10.992
48071,United States,1934,123580010.0,1436.239,11.622
48072,United States,1935,124448298.0,1492.321,11.991
48073,United States,1936,125322686.0,1713.412,13.672


## Cumulative emissions over time

In [6]:
df0_Top20_all_years = df0_Top20
df0_Top20 = df0_Top20[df0_Top20['year']>1950]

In [7]:
import plotly.express as px

fig = px.line(df0_Top20, x = 'year', y = 'cumulative_co2', color = 'country', template='plotly_white', height=600, width=1200)
fig.update_traces(line={'width': 3})



## Annual emissions to 2021

In [8]:
fig = px.line(df0_Top20, x = 'year', y = 'co2', color = 'country', template='plotly_white', height=600, width=1200)
fig.update_traces(line={'width': 3})

## Cumlative to... select year

In [9]:
year = 2021
fig = px.line(df0_Top20_all_years[df0_Top20_all_years['year'] < year], 
              x = 'year', y = 'cumulative_co2', color = 'country', 
              title = "Cumulative emissions since 1950",
              template='plotly_white', height=600, width=1200)
fig.update_traces(line={'width': 3})

## Annual to... select year

In [10]:
year = 2021
fig = px.line(df0_Top20_all_years[df0_Top20_all_years['year'] < year], 
        x = 'year', y = 'co2', color = 'country', 
        title = "Annual emissions by country",
        template='plotly_white', height=600, width=1200)
fig.update_traces(line={'width': 3})

## Cumulative Emissions 2021

In [11]:
df0_Top20_2021 = df0_Top20[df0_Top20['year'] == 2021].sort_values('cumulative_co2')
fig = px.bar(x=df0_Top20_2021['country'],y=df0_Top20_2021['cumulative_co2'], 
    color = df0_Top20_2021['country'], title = "Cumulative Emissions to 2021",
    template='plotly_white', height=600, width=1200)
fig.update_layout(showlegend=False)
fig

## Emissions per capita 2021

In [12]:
df0_Top20_2021 = df0_Top20[df0_Top20['year'] == 2021].sort_values('co2_per_capita')
fig = px.bar(x=df0_Top20_2021['country'],y=df0_Top20_2021['co2_per_capita'], color = df0_Top20_2021['country'], 
             title = "Emissions per capita 2021",
             template='plotly_white', height=600, width=1200)
fig.update_layout(showlegend=False)
fig

## Population 2021

In [13]:
df0_Top20_2021 = df0_Top20[df0_Top20['year'] == 2021].sort_values('population')
fig = px.bar(x=df0_Top20_2021['country'],y=df0_Top20_2021['population'], color = df0_Top20_2021['country'], 
             title = "Population 2021",
             template='plotly_white', height=600, width=1200)
fig.update_layout(showlegend=False)
fig

In [14]:
import entities
df_economic = pd.read_csv('owid-co2-data.csv')
df_economic = df_economic[df_economic['country'].isin(entities.economic_entities)]
df_economic
fig = px.line(df_economic, x = 'year', y = 'co2', color = 'country', 
              title = "Emissions by economic ranking",
              template='plotly_white', height=600, width=1200)
fig.update_traces(line={'width': 3})

## World total emissions

In [15]:
df_countries = pd.read_csv('owid-co2-data.csv')
df_world = df_countries[df_countries['country']=='World']
tot_emissions_2021 = float(df_world[df_world['year'] == 2021]['co2'])
tot_emissions_2021

37123.852

## List of countries with CO2/CO2 per capita emissions

In [16]:
df_countries = df_countries[df_countries['country'].isin(entities.countries)]

lst = ['country','year','co2','co2_per_capita']
country_data = df_countries[df_countries.columns.intersection(lst)]
country_data
#country_data.sort_values(by = 'co2', ascending=False)

Unnamed: 0,country,year,co2,co2_per_capita
0,Afghanistan,1850,,
1,Afghanistan,1851,,
2,Afghanistan,1852,,
3,Afghanistan,1853,,
4,Afghanistan,1854,,
...,...,...,...,...
50593,Zimbabwe,2017,9.596,0.651
50594,Zimbabwe,2018,11.795,0.784
50595,Zimbabwe,2019,11.115,0.724
50596,Zimbabwe,2020,10.608,0.677


## All countries with < 2% of global emissions add up to 36% of total


In [17]:
country_data_2021 = country_data[country_data['year'] == 2021]
print(country_data_2021.sort_values(by = 'co2', ascending=False))

tot_2pc_countries = country_data_2021[country_data_2021['co2'] <= tot_emissions_2021*0.02]['co2'].sum()

tot_2pc_countries/tot_emissions_2021

                            country  year        co2  co2_per_capita
9425                          China  2021  11472.368           8.046
48158                 United States  2021   5007.336          14.859
21465                         India  2021   2709.684           1.925
38579                        Russia  2021   1755.547          12.099
23184                         Japan  2021   1067.398           8.566
...                             ...   ...        ...             ...
37787                   Puerto Rico  2021        NaN             NaN
39630    Saint Martin (French part)  2021        NaN             NaN
44444        Svalbard and Jan Mayen  2021        NaN             NaN
48330  United States Virgin Islands  2021        NaN             NaN
49809                Western Sahara  2021        NaN             NaN

[233 rows x 4 columns]


0.3593617117103043

## All countries with > 2% of global emissions of total - there are 6

In [18]:
tot_98pc_countries = country_data_2021[country_data_2021['co2'] > tot_emissions_2021*0.02]
tot_98pc_countries.sort_values(by = 'co2', ascending=False)

Unnamed: 0,country,year,co2,co2_per_capita
9425,China,2021,11472.368,8.046
48158,United States,2021,5007.336,14.859
21465,India,2021,2709.684,1.925
38579,Russia,2021,1755.547,12.099
23184,Japan,2021,1067.398,8.566
21980,Iran,2021,748.879,8.517


In [19]:
tot_98pc_countries['co2'].sum()/tot_emissions_2021

0.6131155786312261

In [20]:
reduced_list = tot_98pc_countries.append({'country': '2percenters', 'year': 2021, 'co2': tot_2pc_countries}, ignore_index=True)
reduced_list


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



Unnamed: 0,country,year,co2,co2_per_capita
0,China,2021,11472.368,8.046
1,India,2021,2709.684,1.925
2,Iran,2021,748.879,8.517
3,Japan,2021,1067.398,8.566
4,Russia,2021,1755.547,12.099
5,United States,2021,5007.336,14.859
6,2percenters,2021,13340.891,


In [21]:
fig = px.bar(reduced_list.sort_values(by = 'co2', ascending=False), 
             x = 'country', y = 'co2', 
             color = 'country', 
             template='plotly_white', height=600, width=1200)
fig.update_layout(showlegend=False)
fig


In [22]:
fig = px.pie(reduced_list.sort_values(by = 'co2', ascending=False), 
              values = 'co2', names = 'country',
             template='plotly_white', height=600, width=600)
fig.update_traces(textposition='inside', textinfo='label+percent')
fig

In [23]:
df0[df0['year']==2000]

Unnamed: 0,country,year,iso_code,population,gdp,cement_co2,cement_co2_per_capita,co2,co2_growth_abs,co2_growth_prct,...,share_global_other_co2,share_of_temperature_change_from_ghg,temperature_change_from_ch4,temperature_change_from_co2,temperature_change_from_ghg,temperature_change_from_n2o,total_ghg,total_ghg_excluding_lucf,trade_co2,trade_co2_share
150,Afghanistan,2000,AFG,1.954299e+07,1.128379e+10,0.010,0.001,1.047,-0.045,-4.078,...,,0.080,0.000,0.000,0.001,0.000,11.82,14.21,,
422,Africa,2000,,8.189524e+08,,31.310,0.038,886.403,55.992,6.743,...,,8.746,0.040,0.052,0.099,0.007,3119.22,2063.54,-167.010,-18.841
594,Africa (GCP),2000,,,,,,886.392,,,...,,,,,,,,,,
766,Aland Islands,2000,ALA,,,,,,,,...,,,,,,,,,,
938,Albania,2000,ALB,3.182027e+06,1.521426e+10,0.094,0.029,3.025,0.039,1.297,...,,0.031,0.000,0.000,0.000,0.000,8.42,8.16,1.066,35.240
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49788,Western Sahara,2000,ESH,2.703890e+05,,,,,,,...,,,,,,,,,,
50060,World,2000,,6.148899e+09,5.986666e+13,721.927,0.117,25453.623,721.234,2.916,...,100.0,100.000,0.321,0.757,1.135,0.058,35994.91,34208.32,-0.002,-0.000
50232,Yemen,2000,YEM,1.862870e+07,7.775042e+10,0.541,0.029,14.494,0.770,5.609,...,,0.043,0.000,0.000,0.000,0.000,34.70,34.70,,
50404,Zambia,2000,ZMB,9.891140e+06,1.496412e+10,0.154,0.016,1.784,0.007,0.366,...,,0.228,0.001,0.002,0.003,0.000,45.42,22.88,2.113,118.450


In [24]:
df0['gdp_per_capita'] = df0['gdp']/df0['population']
#temp = df0[df0['year'1900]
px.scatter(df0[df0['year']>2013], x='co2_per_capita', y = 'gdp_per_capita', trendline='ols',
    template='plotly_white', height=600, width=1200)

In [25]:

px.scatter(df0[df0['year']>2013], x='co2', y = 'gdp', trendline='ols',
    template='plotly_white', height=600, width=1200)