In [1]:
import pandas as pd
import psycopg2
from sqlalchemy import create_engine

# Import PostgreSQL password
from config import db_password

In [2]:
# Create engine instance - usually port 5432
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5433/carbon_db"
engine = create_engine(db_string)

# Connect to PostgreSQL server
dbConnection = engine.connect()

# Read data from carbon_emissions table into a DataFrame
df = pd.read_sql("select * from \"carbon_emissions\"", dbConnection)

# Close the database connection
dbConnection.close();

In [3]:
# Consolidate years into one column, new column for co2 values
df.set_index('country', inplace=True)
df = pd.DataFrame(df.stack())

# Format DataFrame
df.reset_index(inplace=True)
df.rename(columns={'level_1': 'year', 0: 'co2'}, inplace=True)
df = df.astype({'year': 'int64'})

# Eliminate rows with country groups rather than individual country
country_groups = [
    'World', 'Asia', 'Upper-middle-income countries', 'High-income countries', 'Asia (excl. China & India)',
    'Lower-middle-income countries', 'North America', 'Europe', 'European Union (28)', 'European Union (27)',
    'Europe (excl. EU-27)', 'Europe (excl. EU-28)', 'Africa', 'North America (excl. USA)', 'International transport',
    'South America', 'Oceania', 'Low-income countries'
]
df = df[~df['country'].isin(country_groups)]

In [4]:
df.shape

(57723, 3)

In [5]:
df[df['year']==2020].sort_values(['co2'], ascending=False)[:10]

Unnamed: 0,country,year,co2
11923,China,2020,10667.887
59890,United States,2020,4712.771
26828,India,2020,2441.792
47153,Russia,2020,1577.136
29267,Japan,2020,1030.775
27641,Iran,2020,745.035
22492,Germany,2020,644.31
49592,Saudi Arabia,2020,625.508
53115,South Korea,2020,597.605
27099,Indonesia,2020,589.5


In [6]:
country_list = list(df[df['year']==2020].sort_values(['co2'], ascending=False)[:10].country)
country_list

['China',
 'United States',
 'India',
 'Russia',
 'Japan',
 'Iran',
 'Germany',
 'Saudi Arabia',
 'South Korea',
 'Indonesia']