# 1. CO2 emissions per capita in the world in 2020
by Alex Canas Rengifo (UNI: afc2176)

This project uses the World Bank's ESG Data Draft dataset, which provides information on 17 key sustainability themes spanning environmental, social, and governance categories. My project focuses primarily on per capita CO2 emissions for the world's information for the year 2020.

**Reference**

https://datacatalog.worldbank.org/search/dataset/0037651/Environment--Social-and-Governance-Data



**Project 1: using pandas directory**

In [1]:
import plotly.io as pio

pio.renderers.default = "vscode+jupyterlab+notebook_connected"


In [2]:
import pandas as pd
esg=pd.read_csv('ESGCSV.csv')
esg.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Arab World,ARB,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.ZS,,,,,,,...,86.705717,86.942778,87.228705,87.390856,87.617862,87.79874,87.948264,88.092536,,
1,Arab World,ARB,Access to electricity (% of population),EG.ELC.ACCS.ZS,,,,,,,...,88.832276,89.053852,89.539016,90.662754,89.176939,90.352802,90.63505,90.845661,,
2,Arab World,ARB,Adjusted savings: natural resources depletion ...,NY.ADJ.DRES.GN.ZS,,,,,,,...,10.050554,6.130655,5.265859,6.245422,8.187714,7.234436,4.598506,,,
3,Arab World,ARB,Adjusted savings: net forest depletion (% of GNI),NY.ADJ.DFOR.GN.ZS,,,,,,,...,0.084361,0.096672,0.092911,0.102684,0.057123,0.064516,0.075686,,,
4,Arab World,ARB,Agricultural land (% of land area),AG.LND.AGRI.ZS,,30.981414,30.982663,31.007054,31.018001,31.042466,...,39.834421,39.872575,39.937814,39.984452,39.969738,39.907031,39.97329,39.970742,,


In [3]:
#The following code creates a subset of the ESG data framework aiming to deal only with the indicator
# "CO2 emissions (metric tons per capita)" or series code "EN.ATM.CO2E.PC" for the year 2020

CO2_em=esg.loc[esg['Indicator Code']=='EN.ATM.CO2E.PC',['Country Name','Country Code','Indicator Name','Indicator Code','2020']]


Because the original dataset contains information for both countries and regions, I am only interested in information at a country level. The CO2_em is merged with another dataset that contains information only for countries. I used the Inner Join procedure


In [4]:
#The following code opens the countries dataset 

countries=pd.read_csv('countries_codes.csv')
countries.head()


Unnamed: 0,Country Code
0,KHM
1,CHE
2,NGA
3,CZE
4,UZB


In [5]:
CO2_em_countries=pd.merge(CO2_em,countries,on='Country Code',how='inner')

CO2_em_countries.head()

#I changed the name of the column 2020 to indicate that it corresponds to CO2 emissions in 2020

CO2_em_countries=CO2_em_countries.rename(columns={'2020':'CO2_emissions_2020'})
CO2_em_countries.head()


Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,CO2_emissions_2020
0,Afghanistan,AFG,CO2 emissions (metric tons per capita),EN.ATM.CO2E.PC,0.223479
1,Albania,ALB,CO2 emissions (metric tons per capita),EN.ATM.CO2E.PC,1.54455
2,Algeria,DZA,CO2 emissions (metric tons per capita),EN.ATM.CO2E.PC,3.718223
3,Andorra,AND,CO2 emissions (metric tons per capita),EN.ATM.CO2E.PC,5.777148
4,Angola,AGO,CO2 emissions (metric tons per capita),EN.ATM.CO2E.PC,0.592743


In [6]:
#4.b #Calculate a) mean; b) median; and c)

CO2_emissions_mean=CO2_em_countries['CO2_emissions_2020'].mean()
CO2_emissions_median=CO2_em_countries['CO2_emissions_2020'].median()
CO2_emissions_mode=CO2_em_countries['CO2_emissions_2020'].mode()

print(f'Mean:{CO2_emissions_mean:.2f} tons of CO2 per capita')
print(f'Median:{CO2_emissions_median:.2f} tons of CO2 per capita')
print(f'Mode:{CO2_emissions_mode[0]:.2f} tons of CO2 per capita' if not CO2_emissions_mode.empty else 'No mode')


Mean:3.81 tons of CO2 per capita
Median:2.41 tons of CO2 per capita
Mode:0.03 tons of CO2 per capita


**Project 1: using Python standard library**

In [7]:

# I interested in obtaining the mean, median and mode of the per capita emissions using information about countries
#First, I created a list of the country codes using a txt. file:

def country_codes():
    # Open the coffee.txt file.
    countrycodes_file = open('countries_codes.txt', 'r')

    # Read the first country code.
    descr = countrycodes_file.readline()
    codes=[]

    while descr != '':

        # Strip the \n from the country code.
        descr = descr.rstrip('\n')

        #Fill the list with the country code.
        codes.append(descr)

        # Read the next country code.
        descr = countrycodes_file.readline()

    # Close the file.
    countrycodes_file.close()
    return codes


In [8]:
#I imported the database choosing only the variable of interest

import csv
with open ('ESGCSV.csv', 'r') as file:
    #I read the whole database using 'list' in order to apply list comprenhension'
    df=list(csv.DictReader(file))
    CO2={
        'Country Name':[row['Country Name'] for row in df if row['Indicator Code']=='EN.ATM.CO2E.PC'],
        'Country Code':[row['Country Code'] for row in df if row['Indicator Code']=='EN.ATM.CO2E.PC'],
        'CO2_emissions_2020':[row['2020'] for row in df if row['Indicator Code']=='EN.ATM.CO2E.PC']
    }

In [9]:
#I printed a sample to make sure that my new directory contains the information I need

print("CO2_emissions_2020 Sample:", CO2['CO2_emissions_2020'][:5])

CO2_emissions_2020 Sample: ['3.9290777444704865', '4.402687901087046', '5.857205849544673', '2.0168567830325186', '6.221888395314452']


**Mean calculation**


In [10]:

count_emissions=0
count=0

#First, I convert the values of emissions into a float


for row in range(len(CO2['Country Code'])):
     if CO2['Country Code'][row] in country_codes() and CO2['CO2_emissions_2020'][row]!='':
         count_emissions+=float(CO2['CO2_emissions_2020'][row])
         count+=1

if count>0:
        mean=(count_emissions/count)

else: 
    mean=0
    
print(f'the mean is: {mean:.2f} tons of CO2 per capita')


the mean is: 3.81 tons of CO2 per capita


**Median calculation**

In [11]:
l=[]

for row in range(len(CO2['Country Code'])):
    if CO2['Country Code'][row] in country_codes() and CO2['CO2_emissions_2020'][row]!='':
        l.append(float(CO2['CO2_emissions_2020'][row]))

l2=sorted(l)

print(len(l2))

if len(l2)%2==1:
    #Because the indices start at zero
    position=len(l2)//2
    median=l2[position]
    print(f'The median is: {median:.2f} tons of CO2 per capita')

else:
    #Because the indices start at zero
    position1=len(l2)//2-1
    position2=len(l2)//2
    median=(l2[position1]+l2[position2])/2
    print(f'The median is: {median:.2f} tons of CO2 per capita')


190
The median is: 2.41 tons of CO2 per capita


In [12]:
from statistics import multimode

#I rounded the values of CO2 emissions per capita in order to find out (forcing to some extent) if the number of modes 
#if lower than the number of data (190)). 

Rounded_l2=[round(number,2) for number in l2]
modes=multimode(Rounded_l2)
num_modes=len(modes)
print(f"The number of modes is: {num_modes}")
print(f"The modes are: {modes}")

#By doing so, I found 24 modes in the sample

The number of modes is: 24
The modes are: [0.04, 0.09, 0.13, 0.16, 0.2, 0.22, 0.23, 0.34, 0.37, 0.39, 0.41, 0.51, 0.6, 0.63, 0.65, 1.0, 1.12, 1.14, 1.54, 1.96, 2.07, 2.26, 2.62, 3.95]


In [13]:
# I aim to compare graphically the levels of CO2 emissions per capita by income levels using a sparkline

CO2_income_levels=esg[['Country Name','Country Code','Indicator Name','Indicator Code','2020']]

CO2_income_levels=CO2_income_levels.loc[(CO2_income_levels['Indicator Code']=='EN.ATM.CO2E.PC') & ((CO2_income_levels['Country Code']=='HIC') 
                                         | (CO2_income_levels['Country Code']=='MIC') | (CO2_income_levels['Country Code']=='LIC'))]
CO2_income_levels

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,2020
1004,High income,HIC,CO2 emissions (metric tons per capita),EN.ATM.CO2E.PC,8.749965
1856,Low income,LIC,CO2 emissions (metric tons per capita),EN.ATM.CO2E.PC,0.269341
2211,Middle income,MIC,CO2 emissions (metric tons per capita),EN.ATM.CO2E.PC,3.604249


In [14]:
CO2_income_levels['2020']= CO2_income_levels['2020'].astype(float)

In [15]:
CO2_income_levels['2020']=CO2_income_levels['2020'].round(1)


CO2_income_levels['sparkline'] = CO2_income_levels['2020'].apply(lambda x: round(x) * '*') 

"""
lambda function is an anonymous function (i.e., defined without a name) that can take any number of arguments but, unlike normal functions, evaluates and returns only one expression. 
# In this case, lambda takes the value of the row in the column '2020' and multiply it by the string '*'. 

"""

CO2_income_levels

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,2020,sparkline
1004,High income,HIC,CO2 emissions (metric tons per capita),EN.ATM.CO2E.PC,8.7,*********
1856,Low income,LIC,CO2 emissions (metric tons per capita),EN.ATM.CO2E.PC,0.3,
2211,Middle income,MIC,CO2 emissions (metric tons per capita),EN.ATM.CO2E.PC,3.6,****


In [16]:
# Print column headers
print(f'{'Country Name':<30} {'Emissions per Capita 2020':<30}')

# Print a separator line
print(f"{'-'*30} {'-'*30}")

for index, row in CO2_income_levels.iterrows():
    print(f'{row['Country Name']:<30} {row['sparkline']:<20}')

Country Name                   Emissions per Capita 2020     
------------------------------ ------------------------------
High income                    *********           
Low income                                         
Middle income                  ****                
