<a href="https://colab.research.google.com/github/german-kcj/c02_gdp_population/blob/main/gdp_pcap_co2_emissions_continent_population.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Source Data - Gapminder

https://www.gapminder.org/data/

## C02 Emissions per person
- https://data.ess-dive.lbl.gov/portals/CDIAC
- C02 emissions (tonnes per person).  Carbon dioxide emissions from the burning of fossil fuels (metric tonnes of C02 per person).

## ddf entities geo country - Countries
- https://github.com/open-numbers/ddf--gapminder--systema_globalis/blob/master/ddf--entities--geo--country.csv
- countries and info.  Selecting the continent

## GDP per capita (Price and inflation adjusted, in PPP$2017)
- https://www.gapminder.org/data/documentation/gd001/
- Gross domestic product per person adjusted for differences in purchasing power (in international dollars, fixed 2017 prices, PPP based on 2017 ICP)

## Population
- http://gapm.io/drop
- Total population counts the number of inhabitants in the territory

## Combined Data
digital_moment2030.csv



---



# Gapminder Attempt to Combine Income (GDP adjusted purchasing power) and CO2 Emissions (tonnes per person)

In [None]:
import pandas as pd

df = pd.read_csv('co2_emissions_tonnes_per_person.csv')
df_no_nan = df.fillna(0)
df_no_nan

co2 = pd.melt(df_no_nan,id_vars='country',var_name='year',value_name='co2')
co2.head(5)



Unnamed: 0,country,year,co2
0,Afghanistan,1800,0.0
1,Angola,1800,0.0
2,Albania,1800,0.0
3,Andorra,1800,0.0
4,UAE,1800,0.0


In [None]:
import pandas as pd

inc = pd.read_csv('gdp_pcap.csv')

# Drop columns not included in co2 dataframe
gdp_selected = inc.drop(inc.columns[220:],axis=1).copy()
gdp_selected

# Custom function to multiply values by 1000 if they end with 'k' and the first character is a number
# I'm sure there is a cleaner way of doing this, but this works with words such as Denmark
def multiply_by_1000(value):
    if type(value) == str and value[0].isnumeric() and value.endswith('k'):
        return float(value[:-1]) * 1000
    else:
        return value

# Apply the custom function to all columns
gdp_no_k = gdp_selected.applymap(multiply_by_1000)

# Convert columns to numeric (if needed)
gdp_numeric = gdp_no_k.apply(pd.to_numeric, errors='ignore')

gdp = pd.melt(gdp_numeric,id_vars='country',var_name='year',value_name='gdp_pcap')
gdp


Unnamed: 0,country,year,gdp_pcap
0,Afghanistan,1800,599.0
1,Angola,1800,465.0
2,Albania,1800,585.0
3,Andorra,1800,1710.0
4,UAE,1800,1420.0
...,...,...,...
42700,Samoa,2018,6140.0
42701,Yemen,2018,2010.0
42702,South Africa,2018,14000.0
42703,Zambia,2018,3430.0


# With the two dataframes having the same shape - we join them together in one dataframe

In [None]:
# Merge the DataFrames based on the 'country' column
co2_income = pd.merge(gdp, co2, on=['year','country'], how='inner')
# co2_income[(co2_income.country == 'Brunei')].iloc[0]
co2_income

Unnamed: 0,country,year,gdp_pcap,co2
0,Afghanistan,1800,599.0,0.0
1,Angola,1800,465.0,0.0
2,Albania,1800,585.0,0.0
3,Andorra,1800,1710.0,0.0
4,UAE,1800,1420.0,0.0
...,...,...,...,...
42262,Samoa,2018,6140.0,1.32
42263,Yemen,2018,2010.0,0.356
42264,South Africa,2018,14000.0,8.1
42265,Zambia,2018,3430.0,0.302


# World Regions

In [None]:
regions = pd.read_csv('ddf--entities--geo--country.csv')

# Lots of very manual cleanup of not needed columns.  I only want the country and the world region
regions = regions.drop(regions.columns[1:12],axis=1).copy()
regions = regions.drop(regions.columns[0:2],axis=1).copy()
regions = regions.drop(regions.columns[1:2],axis=1).copy()
regions = regions.drop(regions.columns[2:9],axis=1).copy()
regions.rename(columns={'name': 'country','world_4region':'region'}, inplace=True)
regions

Unnamed: 0,country,region
0,Abkhazia,europe
1,Aruba,americas
2,Afghanistan,asia
3,Angola,africa
4,Anguilla,americas
...,...,...
268,South Yemen (former),asia
269,Yugoslavia,europe
270,South Africa,africa
271,Zambia,africa


# Merge the three world regions

In [None]:
data = pd.merge(co2_income, regions, on=['country'], how='inner')
data

Unnamed: 0,country,year,gdp_pcap,co2,region
0,Afghanistan,1800,599.0,0.0,asia
1,Afghanistan,1801,599.0,0.0,asia
2,Afghanistan,1802,599.0,0.0,asia
3,Afghanistan,1803,599.0,0.0,asia
4,Afghanistan,1804,599.0,0.0,asia
...,...,...,...,...,...
42262,Zimbabwe,2014,2320.0,0.881,africa
42263,Zimbabwe,2015,2310.0,0.881,africa
42264,Zimbabwe,2016,2290.0,0.771,africa
42265,Zimbabwe,2017,2330.0,0.845,africa


# Add Country Population

In [None]:
import pandas as pd
import math

pop_data = pd.read_csv('pop.csv')

# Drop columns not included in the other dataframes
pop_selected = pop_data.drop(inc.columns[220:],axis=1).copy()

# Custom function to multiply values by 1000 if they end with 'k' or start with an 'M' and the first character is a number
# I'm sure there is a cleaner way of doing this, but this works with words such as Denmark
def multiply_by_1000(value):
    if type(value) == str and value[0].isnumeric() and value.endswith('k'):
        return math.ceil(float(value[:-1]) * 1000)
    elif type(value) == str and value[0].isnumeric() and value.endswith('M'):
        return math.ceil(float(value[:-1]) * 1000000)
    elif type(value) == str and value[0].isnumeric() and value.endswith('B'):
        return math.ceil(float(value[:-1]) * 1000000000)
    else:
        return value

# # Apply the custom function to all columns
pop_no_k_no_m = pop_selected.applymap(multiply_by_1000)

# # Convert columns to numeric (if needed)
pop_numeric = pop_no_k_no_m.apply(pd.to_numeric, errors='ignore')

pop = pd.melt(pop_numeric,id_vars='country',var_name='year',value_name='population')
pop

Unnamed: 0,country,year,population
0,Afghanistan,1800,3280000.0
1,Angola,1800,1570000.0
2,Albania,1800,400000.0
3,Andorra,1800,2650.0
4,UAE,1800,40200.0
...,...,...,...
43138,Samoa,2018,210000.0
43139,Yemen,2018,30800000.0
43140,South Africa,2018,57300000.0
43141,Zambia,2018,17800000.0


# Merge Population

In [None]:
all_data = pd.merge(data, pop, on=['country','year'], how='inner')
all_data.to_csv('./digital_moment2030.csv',index=True)
all_data

Unnamed: 0,country,year,gdp_pcap,co2,region,population
0,Afghanistan,1800,599.0,0.0,asia,3280000.0
1,Afghanistan,1801,599.0,0.0,asia,3280000.0
2,Afghanistan,1802,599.0,0.0,asia,3280000.0
3,Afghanistan,1803,599.0,0.0,asia,3280000.0
4,Afghanistan,1804,599.0,0.0,asia,3280000.0
...,...,...,...,...,...,...
42262,Zimbabwe,2014,2320.0,0.881,africa,13900000.0
42263,Zimbabwe,2015,2310.0,0.881,africa,14200000.0
42264,Zimbabwe,2016,2290.0,0.771,africa,14500000.0
42265,Zimbabwe,2017,2330.0,0.845,africa,14800000.0


# Plot It

## Plot co2 bar, color continent


In [None]:
import plotly.express as px

px.bar(all_data, x='year', y='co2', hover_data='country', color='region')

# Plot co2 bar, colour continent (grouped continents)

In [None]:
import plotly.express as px

px.bar(all_data, x='year', y='co2', hover_data='country', color='region', barmode='group')

Plot co2 bar,

## Plot co2, gdp and income

In [None]:
import plotly.express as px

size_max_default = 30
scaling_factor = 3

fig = px.scatter(all_data,
                 x='gdp_pcap',
                 y='co2',
                 hover_data='country',
                 color='region',
                 log_x=True,
                 size='population',
                 size_max=size_max_default*scaling_factor,
                 animation_frame='year',
                 animation_group='country',
                 range_y=[-2,40],
                 range_x=[500,128000],
                 labels={
                     "gdp_pcap": "GDP per capita (Price and inflation adjusted, in PPP$2017)",
                     "co2": "C02 emissions (tonnes per person)"
                 },
                title="CO2 Emissions - GDP Per Capita",
                color_discrete_sequence=px.colors.qualitative.Bold
               )
fig.update_layout(
    title_font_size=30,
    title_font_color='#fff',
    title_x=0.5,
    font_color="#ccc",
    paper_bgcolor='#3b536b',
    plot_bgcolor='#eeefff'
)

fig.show()