In [51]:
import pandas as pd
import numpy as np

%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [52]:
# load data emissions data up to 2021 - from Jones 2023 https://zenodo.org/records/7636699#.ZA9U6uzP30o

#Updated emissions data from Jones 2023 that contains both fossil CO2
df=emissions_jones = pd.read_csv('input-clean/EMISSIONS_ANNUAL_1830-2021.csv') 
#rename columns to match previous
df=df.rename(columns = {'CNTR_NAME':'country'})
df=df.rename(columns = {'ISO3':'iso_code'})
df=df.rename(columns = {'Year':'year'})

# removes non-country groupings for which iso_code not three characters - https://zenodo.org/records/7636699/files/COUNTRY_GROUPINGS.xlsx?download=1
df = df[df['iso_code'].str.len() == 3]
#Remove other non-country groups that have 3 characters
df = df[~((df['iso_code']=='LDC') | (df['iso_code']=='EIT'))]

# remove data from before 1849 - want data from 1850
df = df[df['year']>1849]

#isolate fossil CO2 and landuse emissions - units are in Pg of CO2 per year - which is equivalent to GTCO2e
#Filter out fossil CO2
fCO2_data=df[(df['Gas']=="CO[2]") & (df['Component']=="Fossil")]

#Combine Kuwait with Kuwaiti Oil Fires (as per OWID approach/methodology)
kuwait_index=fCO2_data.loc[fCO2_data['country']=='Kuwait'].index
kuwait_oil_fires=fCO2_data.loc[fCO2_data['country']=='Kuwaiti Oil Fires','Data']
kuwait_oil_fires.index=kuwait_index
fCO2_data.loc[fCO2_data['country']=='Kuwait','Data']=fCO2_data.loc[fCO2_data['country']=='Kuwait','Data']+kuwait_oil_fires
fCO2_data=fCO2_data[fCO2_data['country']!='Kuwaiti Oil Fires']
# display(fCO2_data[(fCO2_data['country']=='Kuwait')])

#Filter out land use CO2
LULUCF_CO2_data=df[(df['Gas']=="CO[2]") & (df['Component']=="LULUCF")]

#Check shape of fossil and land use data
[fCO2_data.shape, LULUCF_CO2_data.shape]


[(38700, 7), (34400, 7)]

In [53]:
#Import IEA data for international shipping and aviation and format data so it is usable

#Shipping data https://www.iea.org/data-and-statistics/charts/co2-emissions-from-international-shipping-in-the-net-zero-scenario-2000-2030
iship = pd.read_csv('input-clean/co2-emissions-from-international-shipping-in-the-net-zero-scenario-2000-2030.csv',header=None,usecols=list(range(2, 49 + 1))) 
drop_idx = list(range(2,iship.shape[1]+1,2))
iship_df = iship.drop(drop_idx, axis=1)
column_names=list(range(2000,2023+1))
iship_df.columns=column_names

#Aviation data https://www.iea.org/data-and-statistics/charts/direct-co2-emissions-from-aviation-in-the-net-zero-scenario-2000-2030
iavia = pd.read_csv('input-clean/direct-co2-emissions-from-aviation-in-the-net-zero-scenario-2000-2030.csv',header=None,delimiter=';',usecols=list(range(4, 97)))
drop_idx = list(range(4+2,iavia.shape[1],4))
iavia_df = iavia.drop(iavia.columns.difference(drop_idx), axis=1)
column_names=list(range(2000,2021+1))
iavia_df.columns=column_names

#Calculate share of emissions from aviations compared to shipping in 2021 - use this later
avia_share_2021=iavia_df[2021]/(iavia_df[2021]+iship_df[2021])
display(avia_share_2021[0])

#Calculate percentage change in shipping in emissions from 2021 to 2022
shipping_pct_change_2021_2022=(iship_df[2022]-iship_df[2021])/iship_df[2021]
display(shipping_pct_change_2021_2022[0])

0.36460368340193083

0.05373134328358209

In [54]:
# Latest data from Carbon Monitor to get estimated % changes during 2022/2023
# https://carbonmonitor.org/

#cm = pd.read_csv('input/carbonmonitor-global_datas_2022-10-10.csv') #https://carbonmonitor.org/variation
cm = pd.read_csv('input-clean/carbonmonitor-global_datas_2023-10-13.csv') 

# convert dates in Carbon Monitor data
cm['date'] = pd.to_datetime(cm.date,dayfirst=True) # ensure dates processed correctly (day first)
cm['year'] = cm.date.dt.year
cm['month'] = cm.date.dt.month
cm['day'] = cm.date.dt.day

In [55]:
#Import OWID database to get consumption emissions data
owid = df = pd.read_csv('input-clean/owid-co2-data.csv') 
owid_fossil_CO2=owid[['country','iso_code','year','co2','consumption_co2','trade_co2','trade_co2_share']]
owid_fossil_cons_CO2=owid_fossil_CO2[['country','iso_code','year','consumption_co2','trade_co2_share']]


In [56]:
# load population data with projections to 2022 and 2023
# source is OWID population series - https://ourworldindata.org/grapher/population-with-un-projections
pop = pd.read_csv('input-clean/population-long-run-with-projections.csv')
pop.columns = ['country','iso_code','year','population','population estimate']
#Combine historical data with projections
pop['population']=pop['population'].combine_first(pop['population estimate'])
pop = pop.drop(pop.columns[-1], axis=1)
#Take out entries with nan iso code - regions
pop = pop[pop.iso_code.notna()]
#Take out world
pop_world=pop[(pop['country']=='World')]
pop=pop[~(pop['country']=='World')]
#Replace OWID_KOS with KSV - to incorporate Kosovo
pop['iso_code'].replace(to_replace='OWID_KOS',value='KSV',inplace=True)
#Filter out data for 1849-2023
pop_1850_2023=pop[(pop['year'] >= 1849) & (pop['year'] <= 2023)].sort_values(by=['iso_code','year'])


In [57]:
# extract pct change during 2022 for key countries, world and ROW
proj22 = cm[['country','year','value']].groupby(by=['country','year']).sum().reset_index().pivot_table(index='country',columns='year',values='value').reset_index()
#Extract data for international aviation and add to table
proj22_IA = cm[(cm['country']=="WORLD") & (cm['sector']=="International Aviation")][['country','year','value']].groupby(by=['country','year']).sum().reset_index().pivot_table(index='country',columns='year',values='value').reset_index()
proj22_IA.loc[0,'country']='International Aviation'
proj22=pd.concat([proj22,proj22_IA], axis=0)
#Add data for international shipping from IEA - available up to 2022 so can get % pct change
proj22_IS=proj22_IA.copy()
proj22_IS=proj22_IS.merge(iship_df.loc[:,2019:],how='right')
proj22_IS.loc[0,'country']='International Shipping'
proj22=pd.concat([proj22,proj22_IS], axis=0)
proj22['pct_22'] = proj22[2022]/proj22[2021]-1


  proj22_IS=proj22_IS.merge(iship_df.loc[:,2019:],how='right')


In [58]:
# extract pct change during 2023 ytd for key countries, world and ROW - only taking totals up to August 2023 (month=8) so that can make fair comparison
proj23 = cm[['country','year','value']][cm.month<8].groupby(by=['country','year']).sum().reset_index().pivot_table(index='country',columns='year',values='value').reset_index()
proj23_IA = cm[(cm['country']=="WORLD") & (cm['sector']=="International Aviation")][['country','year','value']][cm.month<8].groupby(by=['country','year']).sum().reset_index().pivot_table(index='country',columns='year',values='value').reset_index()

#Add International aviation data to country data
proj23_IA.loc[0,'country']='International Aviation'
proj23=pd.concat([proj23,proj23_IA], axis=0)

#Add International shipping data to country data
proj23_IS=proj23_IA.copy()
proj23_IS.loc[0,2019:]=np.nan
proj23_IS.loc[0,'country']='International Shipping'
proj23=pd.concat([proj23,proj23_IS], axis=0).reset_index(names='year',drop=True)
proj23['pct_23'] = proj23[2023]/proj23[2022]-1

#Assume that international shipping emissions do not change from 2022 to 2023 (set % change to 0) - data not available for this but reasonable assumption
proj23.loc[proj23['country']=='International Shipping','pct_23'] = 0
# visual check


  proj23_IA = cm[(cm['country']=="WORLD") & (cm['sector']=="International Aviation")][['country','year','value']][cm.month<8].groupby(by=['country','year']).sum().reset_index().pivot_table(index='country',columns='year',values='value').reset_index()


In [59]:
# combine pct changes for 2022 and 2023ytd

proj = pd.merge(proj22[['country','pct_22']],proj23[['country','pct_23']])
#Combine aviation and shipping into bunker entry, based on share of aviation and shipping from 2021, and add to main dataframe
bunker_22=proj.loc[proj['country']=='International Aviation','pct_22'].values[0]*(avia_share_2021[0])+proj.loc[proj['country']=='International Shipping','pct_22'].values[0]*(1-avia_share_2021[0])
bunker_23=proj.loc[proj['country']=='International Aviation','pct_23'].values[0]*(avia_share_2021[0])+proj.loc[proj['country']=='International Shipping','pct_23'].values[0]*(1-avia_share_2021[0])
bunker_data=pd.DataFrame([['Bunkers',bunker_22,bunker_23]],columns=['country','pct_22','pct_23'])
proj=pd.concat([proj,bunker_data],axis=0).reset_index(drop=True)

# rename for consistency with OWID formatting
proj.country = np.where(proj.country=='UK','United Kingdom',proj.country)
proj.country = np.where(proj.country=='US','United States',proj.country)
proj.country = np.where(proj.country=='WORLD','World',proj.country)

#Import list of EU countries - Link
# https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=&ved=2ahUKEwiurp2Kq_2BAxUVW0EAHRhwDlEQFnoECA8QAQ&url=https%3A%2F%2Fec.europa.eu%2Feurostat%2Fstatistics-explained%2Fimages%2F9%2F9f%2FCountry_Codes_and_Names.xlsx&usg=AOvVaw3E1WY8Fd33SHLq3c0FM01U&opi=89978449
EU_countries= pd.read_csv('input-clean/EU_UK_countries.csv') 
EU_countries=EU_countries.rename(columns = {'COUNTRY NAME':'country'})
EU=EU_countries[['country']]
#Add other EU countries to the list by merging and then setting percentage change to EU values
merged_df = pd.concat([proj, EU]).drop_duplicates(subset='country').reset_index(drop=True)
EU_row = merged_df[merged_df['country'] == 'EU27 & UK']
merged_df['pct_22'].fillna(EU_row['pct_22'].item(),inplace=True)
merged_df['pct_23'].fillna(EU_row['pct_23'].item(),inplace=True)

proj=merged_df.reset_index(drop=True)
#

In [60]:
# code to add projected fossil emissions for 2022 and 2023

#Merge dataset based on country names, including all EU countries
df1 = pd.merge(fCO2_data, proj, how='outer')
df1.set_index('country',inplace=True)

#Assign percentage change to countries in the dataset that are not included (use Rest of World - ROW - values)
df1['pct_22'] = np.where(df1['pct_22'].isna(),df1['pct_22'].loc['ROW'],df1['pct_22'])
df1['pct_23'] = np.where(df1['pct_23'].isna(),df1['pct_23'].loc['ROW'],df1['pct_23'])

#take only data for the last year in dataset - 2021
df2 = df1[df1.year==2021]
#Calculate emissions in 2022 and 2023
df2.loc[:,['Data_2022']] = df2.Data * (1+df2.pct_22)
df2.loc[:,['Data_2023']] = df2.Data_2022 * (1+df2.pct_23)
df2.loc[:,['year_22']] = 2022
df2.loc[:,['year_23']] = 2023
df2=df2.reset_index()
columns_list=['country','iso_code','Gas','Component','year','Data','Unit']
df2022 = df2[['country','iso_code','Gas','Component','year_22','Data_2022','Unit']]
df2022.columns = columns_list
df2023 = df2[['country','iso_code','Gas','Component','year_23','Data_2023','Unit']]
df2023.columns = columns_list

#merge 2022 and 2023 data with existing dataset
df3 = pd.merge(fCO2_data,df2022,how='outer')
# print(df3)
df4 = pd.merge(df3,df2023,how='outer')
fCO2_data = df4


In [61]:
#Replace fossil territorial emissions with consumption based emissions where the data is available
# cons_emissions_rel_terri.head()

owid_fossil_cons_CO2['consumption_co2']=owid_fossil_cons_CO2['consumption_co2']/1000
owid_fossil_cons_CO2['trade_co2_share']=owid_fossil_cons_CO2['trade_co2_share']/100
fCO2_data=fCO2_data.merge(owid_fossil_cons_CO2[['iso_code','year','consumption_co2','trade_co2_share']],on=['iso_code','year'],how='left')

#Apply trade CO2 share value from 2019 to the missing years (2021-2023) - assuming that share of trade stays constant and ignoring COVID year
trade_CO2_share_2019=fCO2_data.loc[fCO2_data['year']==2019,'trade_co2_share']
for year in [2021,2022,2023]:
    year_index=fCO2_data.loc[fCO2_data['year']==year].index
    trade_CO2_share_2019.index=year_index
    fCO2_data.loc[fCO2_data['year']==year,'trade_co2_share']=trade_CO2_share_2019
    fCO2_data.loc[fCO2_data['year']==year,'consumption_co2']=(1+fCO2_data.loc[fCO2_data['year']==year,'trade_co2_share'])*fCO2_data.loc[fCO2_data['year']==year,'Data']

#Replace consumption co2 with territory co2 where consumption CO2 not available
fCO2_data['consumption_co2']=np.where(fCO2_data['consumption_co2'].isna(),fCO2_data['Data'],fCO2_data['consumption_co2'])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  owid_fossil_cons_CO2['consumption_co2']=owid_fossil_cons_CO2['consumption_co2']/1000
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  owid_fossil_cons_CO2['trade_co2_share']=owid_fossil_cons_CO2['trade_co2_share']/100


In [62]:
# add 2022 and 2023 data to LULUCF data - assume that emissions remain the same as in 2021
df5=LULUCF_CO2_data.copy() 
df5.set_index('country',inplace=True)

#take only data for the last year in dataset - 2021
df6 = df5[df5.year==2021]
#Calculate emissions in 2022 and 2023 - assume same as in 2021
df6.loc[:,['Data_2022']] = df6.Data
df6.loc[:,['Data_2023']] = df6.Data
df6.loc[:,['year_22']] = 2022
df6.loc[:,['year_23']] = 2023
df6=df6.reset_index()

columns_list=['country','iso_code','Gas','Component','year','Data','Unit']
df2022_LU = df6[['country','iso_code','Gas','Component','year_22','Data_2022','Unit']]
df2022_LU.columns = columns_list
df2023_LU = df6[['country','iso_code','Gas','Component','year_23','Data_2023','Unit']]
df2023_LU.columns = columns_list

#merge 2022 and 2023 data with existing dataset
df7 = pd.merge(LULUCF_CO2_data,df2022_LU,how='outer')
df8 = pd.merge(df7,df2023_LU,how='outer')

LULUCF_CO2_data=df8


In [63]:

#Merge fossil CO2 and LULUCF, including unique entries too
total_CO2_data=fCO2_data.merge(LULUCF_CO2_data[['country','year','Component','Data']], on=['country','year'], how='left')
#print(total_CO2_data.loc[total_CO2_data['iso_code']=='ABW'])
#Add Fossil and LULUCF together to get Total CO2 - ignore Nan values for LULUCF and treat them as 0 
total_CO2_data.loc[:,'Data_x+y']=total_CO2_data['Data_x'].add(total_CO2_data['Data_y'],fill_value=0)

#print(total_CO2_data)
#Restructure and rename dataframe into original format
total_CO2_data_final=total_CO2_data[['iso_code','country','year','Data_x','Data_y','Data_x+y','consumption_co2']]
#Add units to the labels for clarity - P gCO2 = 10^15 gCO2 = 10^9 tCO2 = GtCO2
total_CO2_data_final=total_CO2_data_final.rename(columns={'Data_x': 'Fossil CO2/GtCO2', 'Data_y': 'Land CO2/GtCO2','Data_x+y': 'Total/GtCO2','consumption_co2': 'Fossil Cons CO2/GtCO2'})

#Set LULUCF Nan values to 0
total_CO2_data_final['Land CO2/GtCO2']=total_CO2_data_final['Land CO2/GtCO2'].fillna(0)

#Add total with consumption emissions included
total_CO2_data_final['Cons Total/GtCO2']=total_CO2_data_final['Fossil Cons CO2/GtCO2']+total_CO2_data_final['Land CO2/GtCO2']


In [64]:
# merge CO2 data with population data, continent data and flags

#Merge population
df1 = total_CO2_data_final.merge(pop_1850_2023[['iso_code','year','population']],on=['iso_code','year'],how='left')

# Source for region definitions https://ourworldindata.org/grapher/continents-according-to-our-world-in-data?overlay=data
#Merge regions
regions = pd.read_csv('input-clean/continents-according-to-our-world-in-data.csv')
regions.columns = ['country','iso_code','year','region']
regions_country=regions[['country','region']]
regions_country.set_index('country',inplace=True)
regions = regions[['iso_code','region']]
df2 = pd.merge(df1,regions,on=['iso_code'],how='left')

#Merge flags
flags = pd.read_csv('input-clean/flags.csv')
flags=flags[['iso_code','image']]
#print(flags)
df3 = pd.merge(df2,flags,how='left')

#Add supplementary data for bunkers
df3.loc[df3['iso_code']=='ZZZ','population']=np.nan
df3.loc[df3['iso_code']=='ZZZ','region']='World'
df3.loc[df3['iso_code']=='ZZZ','image']='https://upload.wikimedia.org/wikipedia/commons/2/2f/Flag_of_the_United_Nations.svg'

#Add population, region and image data for Kosovo
df3.loc[df3['iso_code']=='KSV','region']='Europe'
df3.loc[df3['iso_code']=='KSV','image']='https://upload.wikimedia.org/wikipedia/commons/1/1f/Flag_of_Kosovo.svg'

#Add other missing countries regions
missing_regions = pd.read_csv('input-clean/missing_regions.csv')
df3 = pd.merge(df3,missing_regions[['country','region_fill']],on=['country'],how='left')
#Replace region with region fill where region=Nan
df3['region']=np.where(df3['region'].isna(),df3['region_fill'],df3['region'])
df3=df3.drop('region_fill',axis=1)


In [65]:
#Interpolate population data for all countries with NaN values

#Find all countries with incomplete population data
countries_with_nan = df3.groupby('country')['population'].apply(lambda x: x.isna().any()).reset_index()
countries_with_nan = countries_with_nan[countries_with_nan['population']]
countries_with_nan_df=df3[df3['country'].isin(countries_with_nan['country'])]

# Define a function to interpolate missing values
def interpolate_population(group):
    group['population'] = group['population'].interpolate(method='linear',limit_direction='forward')
    return group
# # Apply the interpolation function to each group (grouped by 'country')
interpolated_df = countries_with_nan_df.groupby('country').apply(interpolate_population)
# # Reset the index to make the DataFrame flat
interpolated_df.reset_index(drop=True, inplace=True)

result_df = df3.merge(interpolated_df[['country','year','population']], on=['country', 'year'], suffixes=('', '_interp'), how='left')

# Update the 'population' column with interpolated values where available
result_df['population'].fillna(result_df['population_interp'], inplace=True)

# Drop the '_interp' columns
result_df.drop(columns=[col for col in result_df.columns if col.endswith('_interp')], inplace=True)

countries_with_nan_df=result_df[result_df['country'].isin(countries_with_nan['country'])]

df3=result_df


In [66]:
#Fill in where region is not available
df3['region'].fillna(value='World',inplace=True)
df3['image'].fillna(value='https://upload.wikimedia.org/wikipedia/commons/2/2f/Flag_of_the_United_Nations.svg',inplace=True)

#Fill in where population not available - set Nan to 0
df3['population'].fillna(value=0,inplace=True)


In [67]:
#Sort emissions data
emissions_data_final= df3.sort_values(by=['iso_code','year']).reset_index(drop=True)


In [68]:
# load wimmer data on colonial rule - source: https://www.awimmer.com/_files/archives/24595a_3dc5d58ce03942a8b5b5ed444642d32d.zip?dn=WimmerFeinsteinReplication.dta.zip
wimmer = pd.read_stata('input-clean/WimmerFeinsteinReplication.dta')

#Filter only empire data
wimmer=wimmer[['cowcode','country','year','Yugoslavia','Ottoman',
                           'Netherlands', 'Portugal', 
                          'Spain','KKAustria', 'SU',
                           'Romanov', 'indepstate',
                           'French', 'British','otherempires']][wimmer['year']>=1850]

#Set Nan values to 0 and convert most columns to float, cowcode to int - required for later
wimmer.fillna(0, inplace=True)
numerical_columns = wimmer.select_dtypes(include=['int8','int64', 'float64','float32']).columns
wimmer[numerical_columns] = wimmer[numerical_columns].astype(float)
wimmer['cowcode'] = wimmer['cowcode'].astype(int)

#Rename Wimmer countries based on missing jones countries
missing_jones=pd.read_csv('input-clean/wimmer_country_name_edit.csv')
rename_dict = missing_jones.set_index('country')['rename'].to_dict()
#Apply rename dictionary to wimmer
wimmer['country'] = wimmer['country'].map(rename_dict).fillna(wimmer['country'])


One or more strings in the dta file could not be decoded using utf-8, and
so the fallback encoding of latin-1 is being used.  This can happen when a file
has been incorrectly encoded by Stata or some other software. You should verify
the string values returned are correct.
  wimmer = pd.read_stata('input-clean/WimmerFeinsteinReplication.dta')


In [69]:
#Check sum of empires in wimmer dataframe before modification
wimmer_check=wimmer.copy()
#Remove other empires from wimmer for checking
wimmer_check=wimmer_check.drop('otherempires',axis=1)
from functions import get_empire_sums, get_country_year_ranges, get_country_empire_year_ranges_2

#Check sum of empires in wimmer dataframe - determine those that equal 1, 2, 0, and none of those. Sum of empires should equal 1 across every row
sum_tuple,sum_1_country_years, sum_0_country_years, sum_2_country_years, sum_not_0_1_2_country_years=get_empire_sums(wimmer_check)
display(sum_tuple)


(0.8341744772891132,
 0.11256308579668349,
 0.043258832011535686,
 0.010003604902667627)

In [70]:

#Expand other empires from Wimmer
wimmer=wimmer.copy()

from functions import update_colonization_data

if 'China' not in wimmer.columns:
    other_empires=wimmer.loc[wimmer['otherempires']==1]
    other_countries=other_empires.loc[:,'country'].unique()

    min_year_list=[]
    max_year_list=[]

    for country in other_countries:
        min_year=other_empires.loc[other_empires['country']==country]['year'].min()
        min_year_list.append(min_year)
        max_year=other_empires.loc[other_empires['country']==country]['year'].max()
        max_year_list.append(max_year)

    #Add empires via CSV file
    df_other_empires=pd.read_csv('input-clean/other_empires_wimmer_edit.csv')
    df_other_empires=df_other_empires.drop('Unnamed: 0',axis=1)

    #Adjust wimmer based on other empires dataset
    #This function adds any empires that are missing and then puts 1 to the countries/years specified (only one empire can be specified), it does not remove or overwrite existing data
    wimmer=update_colonization_data(df_other_empires,wimmer)
    wimmer=wimmer.drop('otherempires',axis=1)

In [71]:

#Adjust wimmer based on sum_0_range_dataset: add 1 where they are missing so all adds up to 1

#import modification file
sum_0_range_table=pd.read_csv('input-clean/sum_0_country_years_clean_edit.csv')
sum_0_range_table=sum_0_range_table.drop('Unnamed: 0',axis=1)

if 'Italy' not in wimmer.columns:
    #This function adds any empires that are missing and then puts 1 to the countries/years specified (only one empire can be specified), it does not remove or overwrite existing data
    wimmer=update_colonization_data(sum_0_range_table,wimmer)

sum_tuple,sum_1_country_years, sum_0_country_years, sum_2_country_years, sum_0_2_country_years=get_empire_sums(wimmer)
display(sum_tuple)


(0.9467375630857967, 0.0, 0.043258832011535686, 0.010003604902667627)

In [72]:
#Adjust wimmer based on sum_2_range_dataset: remove data in other columns except the one specified

#Import modification file
sum_2_range_table=pd.read_csv('input-clean/sum_2_country_years_clean_edit.csv')
sum_2_range_table=sum_2_range_table.drop('Unnamed: 0',axis=1)

from functions import update_colonization_data_2
togo_test=wimmer.loc[(wimmer['country']=='Togo')&(wimmer['year']==1960),'French'].reset_index(drop=True)[0]
netherlands_test=wimmer.loc[(wimmer['country']=='Netherlands')&(wimmer['year']==1850),'Netherlands'].reset_index(drop=True)[0]
if netherlands_test!=0:
    #This function modifies wimmer empire values based on the specified country/years given in the edit file, if two empires specified then each empire is set to 0.5, if only one empire given, then empire is set to 1
    #This function overwrites values in other columns to ensure that the sum across each row equals to 1
    wimmer=update_colonization_data_2(sum_2_range_table,wimmer)

sum_tuple,sum_1_country_years, sum_0_country_years, sum_2_country_years, sum_0_2_country_years=get_empire_sums(wimmer)
display(sum_tuple)


(0.9899963950973324, 0.0, 0.0, 0.010003604902667627)

In [73]:
#Adjust wimmer based on sum_0_2_range_dataset: use ottoman share to determine share for other empires
sum_not_0_1_2_range_table=pd.read_csv('input-clean/sum_not_0_1_2_country_years_clean_edit.csv')
sum_not_0_1_2_range_table=sum_not_0_1_2_range_table.drop('Unnamed: 0',axis=1)

yugoslavia_test=wimmer.loc[(wimmer['country']=='Yugoslavia')&(wimmer['year']==1917),'indepstate'].reset_index(drop=True)[0]

if yugoslavia_test==1:
    wimmer=update_colonization_data_2(sum_not_0_1_2_range_table,wimmer)

sum_tuple,sum_1_country_years, sum_0_country_years, sum_2_country_years, sum_0_2_country_years=get_empire_sums(wimmer)

display(sum_tuple)

(1.0, 0.0, 0.0, 0.0)

In [74]:
#import cowcodes list to add to new countries into Wimmer
cow_codes_list=pd.read_csv('input-clean/COW-country-codes.csv')
cow_codes_list=cow_codes_list.rename(columns={'CCode':'cowcode','StateNme':'country','StateAbb':'StateAbb'})
cow_codes_list=cow_codes_list.drop(['StateAbb'],axis=1)
#create new entries for Serbia,Curaco and Hongkong which are missing
serbia = pd.DataFrame({'cowcode': 342, 'country': ['Serbia']})
curacao = pd.DataFrame({'cowcode': 999, 'country': ['Curaçao']})
hongkong = pd.DataFrame({'cowcode': 998, 'country': ['Hong Kong']})
cow_codes_list=pd.concat([cow_codes_list,serbia,curacao,hongkong],ignore_index=True)
#Drop duplicates to avoid problems later
cow_codes_list=cow_codes_list.drop_duplicates()

In [75]:

#Add missing yugoslavia countries into Wimmer
yugoslavia_list=['Yugoslavia','Croatia','Slovenia','Serbia','Bosnia and Herzegovina','North Macedonia','Kosovo','Montenegro']
# #Check if any Yugoslavia countries already in wimmer
yugoslavia_wimmer=wimmer[wimmer['country'].isin(yugoslavia_list)]
in_yugo=yugoslavia_wimmer['country'].unique().tolist()
#Determine countries missing from wimmer
not_in_yugo=set(yugoslavia_list)-set(in_yugo)

# Drop the zero columns
zero_columns = yugoslavia_wimmer.columns[(yugoslavia_wimmer == 0).all()]
yugoslavia_wimmer = yugoslavia_wimmer.drop(zero_columns, axis=1)

from functions import add_missing_countries_to_wimmer

#Create new entries for Serbia, Montenegro and Serbia
if 'Serbia' not in yugoslavia_wimmer['country'].values:

    missing_yugo = pd.DataFrame({'country': ['Serbia','Kosovo','Montenegro']})
    missing_yugo=missing_yugo.merge(cow_codes_list,on='country',how='left')
    wimmer=add_missing_countries_to_wimmer(wimmer,missing_yugo,'Bosnia and Herzegovina')
    yugoslavia_edit=pd.read_csv('input-clean/missing_yugoslavia_wimmer_edit.csv',index_col='Unnamed: 0')
    wimmer=update_colonization_data_2(yugoslavia_edit,wimmer)

#Set all yugoslavia empire to indepstate: based on fact that yugoslavian nations not colonised but a larger alliance of countries
if 'Yugoslavia' in wimmer.columns:
    wimmer.loc[wimmer['Yugoslavia'] == 1, 'indepstate'] = 1
    wimmer.loc[wimmer['Yugoslavia'] == 0.5, 'indepstate'] = wimmer.loc[wimmer['Yugoslavia'] == 0.5, 'Yugoslavia']+0.5
    wimmer=wimmer.drop('Yugoslavia',axis=1)

sum_tuple,sum_1_country_years, sum_0_country_years, sum_2_country_years, sum_0_2_country_years=get_empire_sums(wimmer)
display(sum_tuple)



(1.0, 0.0, 0.0, 0.0)

In [76]:
#Add missing empire countries to Wimmer - UK and Netherlands
if 'United Kingdom' not in wimmer['country'].values:

    missing_empire_countries = pd.DataFrame({'country': ['United Kingdom','France']})
    missing_empire_countries=missing_empire_countries.merge(cow_codes_list,on='country',how='left')
    wimmer=add_missing_countries_to_wimmer(wimmer, missing_empire_countries,'Netherlands')

#Create new entries for significant missing countries for which there is emissions data but are not in Wimmer
if 'Hong Kong' not in wimmer['country'].values:

    missing_wimmer_countries_to_add=pd.read_csv('input-clean/significant_missing_countries_to_add_wimmer_edit.csv')
    missing_wimmer_countries_to_add=missing_wimmer_countries_to_add.drop('Unnamed: 0',axis=1)
    missing_sig_wimmer_countries = pd.DataFrame({'country': missing_wimmer_countries_to_add['country'].unique()})
    missing_sig_wimmer_countries=missing_sig_wimmer_countries.merge(cow_codes_list,on='country',how='left')
    # display(missing_sig_wimmer_countries)
    wimmer=add_missing_countries_to_wimmer(wimmer,missing_sig_wimmer_countries,'Netherlands')
    wimmer=update_colonization_data_2(missing_wimmer_countries_to_add,wimmer)


In [77]:
#Extend Wimmer database to include years 2002-2023, based on replicating 2001 status forwards
if 2023 not in wimmer['year'].values:
    wimmer_2001=wimmer[wimmer['year']==2001]
    years_range = list(range(2002, 2024))
    n = len(years_range)
    wimmer_2001_2023 = pd.DataFrame({'year': years_range * len(wimmer_2001)})
    # Duplicate the 'data_2001' DataFrame
    wimmer_2001_2023 = pd.concat([wimmer_2001] * n, ignore_index=True)
    # Update the 'year' column with the corresponding years
    wimmer_2001_2023['year'] = sorted(years_range * len(wimmer_2001))
    # Reset the index
    wimmer_2001_2023.reset_index(drop=True, inplace=True)
    #Add onto main wimmer dataframe
    wimmer = pd.concat([wimmer,wimmer_2001_2023], ignore_index=True).reset_index(drop=True).sort_values(by=['country','year'])


sum_tuple,sum_1_country_years, sum_0_country_years, sum_2_country_years, sum_0_2_country_years=get_empire_sums(wimmer)
display(sum_tuple)


(1.0, 0.0, 0.0, 0.0)

In [78]:
#Split Austria-Hungary empire responsibility between Austria and Hungary equally

if 'Hungary' not in wimmer.columns:
    wimmer['Austria']=wimmer['KKAustria']/2
    wimmer['Hungary']=wimmer['KKAustria']/2
    #Combine SU and Romanov into Russia
    wimmer['Russia']=wimmer['SU']+wimmer['Romanov']
    #Rename columns
    wimmer=wimmer.rename({'French':'France','British':'United Kingdom','Ottoman':'Turkey','United States':'USA'},axis=1)
    #Drop unnecessary empires
    wimmer=wimmer.drop(['SU','Romanov','KKAustria'],axis=1)

sum_tuple,sum_1_country_years, sum_0_country_years, sum_2_country_years, sum_0_2_country_years=get_empire_sums(wimmer)
display(sum_tuple)


(1.0, 0.0, 0.0, 0.0)

In [79]:
wimmer=wimmer.copy()

#Apply iso codes to wimmer
if 'iso_code' not in wimmer.columns:
    #Rename certain countries in cowcode list such that they match those in the iso_code list - to allow successful merging
    rename_COW_countries={'United States of America': 'United States','Macedonia':'Macedonia, the former Yugoslav Republic of','Yugoslavia':'NaN','Moldova':'Moldova, Republic of','Democratic Republic of the Congo':'Congo, the Democratic Republic of the', 
      'Tanzania':'Tanzania, United Republic of','Iran':'Iran, Islamic Republic of','Syria':'Syrian Arab Republic','North Korea':"Korea, Democratic People's Republic of",'Laos':"Lao People's Democratic Republic"}
    cow_codes_list = cow_codes_list.copy()  # Create a copy of the original DataFrame
    cow_codes_list[['country']]=cow_codes_list[['country']].apply(lambda col: col.map(rename_COW_countries)).fillna(cow_codes_list[['country']])
    #Create new entry for Serbia
    serbia = pd.DataFrame({'cowcode': 342, 'country': ['Serbia']})
    cow_codes_list=pd.concat([cow_codes_list,serbia],ignore_index=True)
    cow_codes_list=cow_codes_list.drop_duplicates()

    iso_codes_list=pd.read_csv('input-clean/countries_codes_and_coordinates.csv') #https://gist.github.com/tadast/8827699
    iso_codes_list=iso_codes_list.rename(columns={'Country':'country','Alpha-3 code':'iso_code'})
    iso_codes_list=iso_codes_list[['country','iso_code']]
    #Create missing entries for kosovo and Curacao, not in iso code database
    kosovo = pd.DataFrame({'country': ['Kosovo'], 'iso_code': ['KSV']})
    curacao = pd.DataFrame({'country': ['Curaçao'], 'iso_code': ['CUW']})
    iso_codes_list=pd.concat([iso_codes_list,kosovo,curacao],ignore_index=True)
    iso_codes_list=iso_codes_list[['country','iso_code']]
    iso_codes_list['iso_code'] = iso_codes_list['iso_code'].apply(lambda x: x.replace('"','').strip())
    cow_iso=cow_codes_list.merge(iso_codes_list,on='country',how='left')
    cow_iso=cow_iso.drop(['country'],axis=1)
    wimmer=wimmer.merge(cow_iso,on='cowcode',how='left')
    wimmer=wimmer.drop(['cowcode'],axis=1)



In [80]:


#Output modified wimmer dataset
modified_wimmer=wimmer.copy()
modified_wimmer.set_index('iso_code',inplace=True)
modified_wimmer=modified_wimmer.rename(columns = {'indepstate':'Independent'})
modified_wimmer.to_csv('output-clean/territorial_rule_database_1850_2023.csv')

sum_tuple,sum_1_country_years, sum_0_country_years, sum_2_country_years, sum_0_2_country_years=get_empire_sums(modified_wimmer,drop_columns=['country','year'])
display(sum_tuple)

(1.0, 0.0, 0.0, 0.0)

In [81]:
#Merge colonial dataset onto emissions data set

if 'country' in wimmer.columns:
    #Drop country from wimmer to avoid repetition after merging
    wimmer.drop(columns=['country'], inplace=True)
    col_emissions = pd.merge(emissions_data_final,wimmer,on=['iso_code','year'],how='left')

#Check how many entries are NaN
if 'indepstate' in wimmer.columns:
    #For all missing wimmer countries, assume indepstate
    #Set indepsstate for all NaN countries - ie those that are missing from Wimmer
    col_emissions['indepstate'].fillna(1, inplace=True)
    # Columns to exclude from filling with 0
    exclude_columns = ['Land CO2/GtCO2','image','region', 'indepstate']
    # Create a dictionary of columns and their fill values
    fill_values = {col: 0 for col in col_emissions.columns if col not in exclude_columns}
    #Fill nan with 0 except for excluded columns
    col_emissions.fillna(fill_values, inplace=True)

#Check that empires still add up to 1
sum_tuple,sum_1_country_years, sum_0_country_years, sum_2_country_years, sum_0_2_country_years=get_empire_sums(col_emissions,drop_columns=['iso_code','country','year','Fossil CO2/GtCO2','Land CO2/GtCO2','Total/GtCO2','Cons Total/GtCO2','Fossil Cons CO2/GtCO2','population','region','image'])
display(sum_tuple)

(1.0, 0.0, 0.0, 0.0)

In [82]:
#Calculate empire-related and net colonial emissions (including territorial), splitting empire emissions into fossil and LULUCF, and considering consumption emissions

#Multiply empire columns by Total/GtCO2 to get emissions allocations for each empire
if 'independent' not in col_emissions.columns:
    empire_countries=['Turkey','Netherlands','Portugal','Spain','France',
                'United Kingdom','Germany','Belgium',	
                'China','Japan','Italy','USA','Australia',
                'Austria','Hungary','Russia']
    multiply_columns=empire_countries.copy()
    multiply_columns.append('indepstate')

    #Add columns for independent and empire that will show state for each in full database
    col_emissions['independent']=col_emissions['indepstate']
    col_emissions['empire']=col_emissions.loc[:,empire_countries].sum(axis=1)

    col_emissions_reference=col_emissions.copy()

    #Iterate over both territorial fossil and consumption based fossil CO2
    fossil_columns=['Fossil CO2/GtCO2','Fossil Cons CO2/GtCO2']
    string_list=['Fossil','Fossil Cons']
    for i in range(len(fossil_columns)):
        emission_columns=[fossil_columns[i],'Land CO2/GtCO2']
        string_list_2=[string_list[i],'Land']
        for ii in range(len(emission_columns)):
            col_emissions=col_emissions_reference.copy()
            emissions_column=emission_columns[ii]
            string=string_list_2[ii]
            emp_string='Emp'+' '+string+' Total/GtCO2'
            col_string='Col'+' '+string+' Total/GtCO2'

            #Multiply empire columns by Total/GtCO2 to get emissions allocations for each empire
            col_emissions.loc[:,multiply_columns]=col_emissions.loc[:,multiply_columns].mul(col_emissions[emissions_column],axis=0)
        
            #For all colonial countries, empire emissions are the sum of all the emissions in the named empire column for that country
            #Emp total = total emissions from empire of that country for that year
            col_emissions[emp_string]=0.0
            #Col total = net total emissions after including impact of colonialism
            col_emissions[col_string]=0.0

            emp_emissions_by_country=pd.DataFrame(columns=empire_countries,index=col_emissions['country'].unique())
            for empire in empire_countries:
                #Mask for empire country
                mask =col_emissions['country']==empire
                #Determine indices for empire country
                empire_indices = col_emissions['country'].loc[mask].index
                #Determine sum of all emissions for that empire for a given year
                empire_emissions_sum = col_emissions.loc[~mask,['year',empire]].groupby('year').sum().reset_index()
                #Assign empire country's indices to the sum of empire emissions - to allow correct assignment in dataframe
                empire_emissions_sum.index=empire_indices
                #Put empire emissions in new column
                col_emissions.loc[mask,emp_string]=empire_emissions_sum[empire]
                #Add empire emissions to territorial emissions to get net total emissions, after including impact of colonialism
                col_emissions.loc[mask,col_string]=col_emissions.loc[mask,emissions_column]+col_emissions.loc[mask,emp_string]

                #Calculate empire emissions contributions by country
                empire_emissions_country_sum = col_emissions.loc[~mask,['country',empire]].groupby('country').sum().reset_index()
                #Set index to country so can combine to new dataframe
                empire_emissions_country_sum.set_index('country',inplace=True)
                #Add empire emissions,split by country, to new dataframe
                emp_emissions_by_country[empire]=empire_emissions_country_sum
                #Add territorial emissions for each empire country
                emp_emissions_by_country.loc[empire,empire]=col_emissions.loc[mask,emissions_column].sum()

                #For all non-colonial countries, empire emissions are the negative sum across all empire columns
                non_emp_mask=~(col_emissions['country'].isin(empire_countries))
                # display(len(col_emissions['country'].loc[non_emp_mask].unique()))
                #Negative sum of all the empire columns for each country
                col_emissions.loc[non_emp_mask,emp_string]=-col_emissions.loc[non_emp_mask,empire_countries].sum(axis=1)
                #Add empire emissions to territorial emissions to get net total emissions, after including impact of colonialism
                col_emissions.loc[non_emp_mask,col_string]=col_emissions.loc[non_emp_mask,emissions_column]+col_emissions.loc[non_emp_mask,emp_string]

            #Correct for where empires and non-empires overlap - Australia and Italy were both colonised but also later a coloniser
            #Subract any emissions from empire emissions for when empire countries were previously colonised by other empires eg Australia and Italy
            for country in empire_countries:
                country_mask=col_emissions['country']==country
                col_emissions.loc[country_mask,emp_string]=col_emissions.loc[country_mask,emp_string]-col_emissions.loc[country_mask,empire_countries].sum(axis=1)
                col_emissions.loc[country_mask,col_string]=col_emissions.loc[country_mask,emissions_column]+col_emissions.loc[country_mask,emp_string]

            #Calculate split of empire emissions for particular colonised countries and add them to a dataframe for plotting
            non_empire_countries_of_interest=['Indonesia','India']
            non_emp_emissions_by_country=pd.DataFrame(columns=non_empire_countries_of_interest,index=empire_countries)

            #Determine empire contributions for each colony of interest
            for country in non_empire_countries_of_interest:
                #Mask for empire country
                mask =col_emissions['country']==country
                #Calculate empire emissions contributions by country
                columns_required=empire_countries.copy()
                non_empire_emissions_country_sum = col_emissions.loc[mask,columns_required].sum(axis=0).mul(-1).reset_index()
                # non_empire_emissions_country_sum=non_empire_emissions_country_sum.T
                non_empire_emissions_country_sum.set_index('index',inplace=True)
                #Add empire emissions,split by country, to new dataframe
                non_emp_emissions_by_country[country]=non_empire_emissions_country_sum
                #Add territorial emissions for each empire country
                non_emp_emissions_by_country.loc['Territorial',country]=col_emissions.loc[mask,emissions_column].sum()
                non_emp_emissions_by_country.loc['Colonial',country]=col_emissions.loc[mask,col_string].sum()

            #Assign new dataframes for fossil and land respectively
            if i==0 and ii==0:
                col_emissions_fossil=col_emissions.copy()
                emp_emissions_by_country_fossil=emp_emissions_by_country.copy()
                non_emp_emissions_by_country_fossil=non_emp_emissions_by_country.copy()
            elif i==0 and ii==1:
                col_emissions_land=col_emissions.copy()
                emp_emissions_by_country_land=emp_emissions_by_country.copy()
                non_emp_emissions_by_country_land=non_emp_emissions_by_country.copy()
            #Save consumption fossil data 
            elif i==1 and ii==0:
                col_emissions_cons_fossil=col_emissions.copy()
    
    #Merge fossil and land dataframes
    col_emissions=pd.merge(col_emissions_fossil,col_emissions_land[['iso_code','year','Emp Land Total/GtCO2','Col Land Total/GtCO2']],on=['iso_code','year'],how='left')
    #Merge consumption fossil data
    col_emissions=pd.merge(col_emissions,col_emissions_cons_fossil[['iso_code','year','Emp Fossil Cons Total/GtCO2','Col Fossil Cons Total/GtCO2']],on=['iso_code','year'],how='left')
    #Add land and fossil together
    col_emissions['Emp Total/GtCO2']=col_emissions['Emp Fossil Total/GtCO2']+col_emissions['Emp Land Total/GtCO2']
    col_emissions['Col Total/GtCO2']=col_emissions['Col Fossil Total/GtCO2']+col_emissions['Col Land Total/GtCO2']
    #Add consumption fossil data
    col_emissions['Emp Cons Total/GtCO2']=col_emissions['Emp Fossil Cons Total/GtCO2']+col_emissions['Emp Land Total/GtCO2']
    col_emissions['Col Cons Total/GtCO2']=col_emissions['Col Fossil Cons Total/GtCO2']+col_emissions['Col Land Total/GtCO2']

    #Merge emissions by country files
    emp_emissions_by_country=emp_emissions_by_country_fossil+emp_emissions_by_country_land
    non_emp_emissions_by_country=non_emp_emissions_by_country_fossil+non_emp_emissions_by_country_land

    # Drop empire columns and indepstate column
    col_emissions=col_emissions.drop(empire_countries,axis=1)
    col_emissions=col_emissions.drop('indepstate',axis=1)   

#Calculate sums of Total and Col Total emissions to make sure they match
sum_total=col_emissions['Total/GtCO2'].sum()
sum_col_total=col_emissions['Col Total/GtCO2'].sum()
sum_emp_total=col_emissions['Emp Total/GtCO2'].sum()
display([sum_total,sum_col_total,sum_emp_total])

#Calculate sums of Cons Total and Col Cons Total emissions to make sure they match
sum_cons_total=col_emissions['Cons Total/GtCO2'].sum()
sum_col_cons_total=col_emissions['Col Cons Total/GtCO2'].sum()
sum_emp_cons_total=col_emissions['Emp Cons Total/GtCO2'].sum()
display([sum_cons_total,sum_col_cons_total,sum_emp_cons_total])




[2557.5996537286082, 2557.5996537286087, -1.4210854715202004e-14]

[2556.972786397332, 2556.972786397332, -7.105427357601002e-15]

In [83]:
#Export full database of emissions
colonial_emissions_database=col_emissions[['iso_code', 'country', 'year','region','population','independent', 'empire', 'Fossil CO2/GtCO2', 'Land CO2/GtCO2',
       'Total/GtCO2', 'Fossil Cons CO2/GtCO2', 'Cons Total/GtCO2','Emp Fossil Total/GtCO2',
       'Col Fossil Total/GtCO2', 'Emp Land Total/GtCO2',
       'Col Land Total/GtCO2', 'Emp Fossil Cons Total/GtCO2',
       'Col Fossil Cons Total/GtCO2', 'Emp Total/GtCO2', 'Col Total/GtCO2',
       'Emp Cons Total/GtCO2', 'Col Cons Total/GtCO2']]

#Convert population into millions of people
colonial_emissions_database['population']=colonial_emissions_database['population']/1e6

colonial_emissions_database=colonial_emissions_database.rename(columns={'empire':'Controlled','independent':'Independent','population':'Population, millions','Total/GtCO2':'Territorial Total/GtCO2','Cons Total/GtCO2':'Consumption Total/GtCO2','Col Total/GtCO2':'Colonial Total/GtCO2','Emp Total/GtCO2':'Empire Total/GtCO2','Total PC1/tCO2/pc':'Territorial per yearly capita /tCO2/pc','Col Total PC1/tCO2/pc':'Colonial per yearly capita /tCO2/pc','Total PC2/tCO2/pc':'Territorial per 2023 capita /tCO2/pc','Col Total PC2/tCO2/pc':'Colonial per 2023 capita /tCO2/pc',
                                            'Cons Total PC1/tCO2/pc':'Consumption per yearly capita /tCO2/pc','Col Cons Total PC1/tCO2/pc':'Colonial consumption per yearly capita /tCO2/pc','Cons Total PC2/tCO2/pc':'Consumption per 2023 capita /tCO2/pc','Col Cons Total PC2/tCO2/pc':'Colonial consumption per 2023 capita /tCO2/pc'})


colonial_emissions_database.loc[colonial_emissions_database['country']=='Bunkers', 'country'] = 'International Transport'

colonial_emissions_database.head()

colonial_emissions_database.to_csv('output-clean/full_year_emissions_summary_table_1850_2023.csv')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  colonial_emissions_database['population']=colonial_emissions_database['population']/1e6


In [84]:
#Add EU as a separate grouping in the table - includes UK
EU_countries=pd.read_csv('input-clean/EU_UK_Countries.csv')
EU_countries=EU_countries.rename({'COUNTRY NAME':'country'},axis=1)
# display(EU_countries)

#Calculate yearly sum for all EU+UK countries
EU_mask=(col_emissions['country'].isin(EU_countries['country']))
col_emissions
EU_emissions_sum = col_emissions.loc[EU_mask,['year','population','Fossil CO2/GtCO2','Land CO2/GtCO2','Total/GtCO2','Emp Fossil Total/GtCO2','Emp Land Total/GtCO2','Emp Total/GtCO2','Col Fossil Total/GtCO2','Col Land Total/GtCO2','Col Total/GtCO2','Cons Total/GtCO2','Emp Cons Total/GtCO2','Col Cons Total/GtCO2']].groupby(['year']).sum()
EU_emissions_sum.reset_index(inplace=True)
EU_emissions_sum['country']='EU+UK'
last_column = EU_emissions_sum.columns[-1]
new_order = [last_column] + [col for col in EU_emissions_sum.columns if col != last_column]
EU_emissions_sum = EU_emissions_sum[new_order]

#Calculate cumulative sum from 1850 for EU+UK
EU_emissions_sum_year=EU_emissions_sum.set_index('year',drop=True)
EU_cumu_emissions=EU_emissions_sum_year.groupby('country').cumsum()
pop_EU=EU_emissions_sum_year['population']
EU_cumu_emissions['population']=pop_EU
EU_cumu_emissions.reset_index(inplace=True)

EU_cumu_emissions['country']='EU+UK'
last_column = EU_cumu_emissions.columns[-1]
new_order = [last_column] + [col for col in EU_cumu_emissions.columns if col != last_column]
EU_cumu_emissions = EU_cumu_emissions[new_order]


#Calculate warming from cumulative CO2 emissions for EU
#0.45C per 1,000GtCO2: source - https://www.carbonbrief.org/in-depth-qa-the-ipccs-sixth-assessment-report-on-climate-science
warming_potential=0.45/1000
EU_cumu_emissions['Temperature increase/degC']=EU_cumu_emissions['Total/GtCO2']*warming_potential
EU_cumu_emissions['Col Temperature increase/degC']=EU_cumu_emissions['Col Total/GtCO2']*warming_potential
EU_cumu_emissions['Difference in temp increase/degC']=EU_cumu_emissions['Col Temperature increase/degC']-EU_cumu_emissions['Temperature increase/degC']



In [85]:

#Get data for colonial emissions animation

#Calculate cumulative emissions for every country - before and after colonialism
colonial_1850 = col_emissions[['country','year','population','Fossil CO2/GtCO2','Land CO2/GtCO2','Total/GtCO2','Emp Fossil Total/GtCO2','Emp Land Total/GtCO2','Emp Total/GtCO2','Col Fossil Total/GtCO2','Col Land Total/GtCO2','Col Total/GtCO2','Cons Total/GtCO2','Emp Cons Total/GtCO2','Col Cons Total/GtCO2']].groupby(by=['country','year','population']).sum().groupby(level=[0]).cumsum().reset_index() # cumulative sum

#Format cumulative emissions data for animation
cumulative_emissions_1850=pd.merge(colonial_1850,col_emissions[['country','year','region','image']],on=['country','year'],how='left')

#Reformat year into date and rename region 
colonial_1850_anime=cumulative_emissions_1850.copy()
colonial_1850_anime['date'] = colonial_1850_anime['year'].astype(str) + '-01-01'
colonial_1850_anime.rename(columns = {'region':'category'}, inplace = True)

colonial_1850_anime=colonial_1850_anime.rename(columns={'Col Total/GtCO2':'value'})
colonial_1850_anime = colonial_1850_anime[['date','country','population','category','image','value']]



In [86]:
#Calculate warming from cumulative CO2 emissions
#0.45C per 1,000GtCO2: source - https://www.carbonbrief.org/in-depth-qa-the-ipccs-sixth-assessment-report-on-climate-science
warming_potential=0.45/1000
colonial_1850['Temperature increase/degC']=colonial_1850['Total/GtCO2']*warming_potential
colonial_1850['Col Temperature increase/degC']=colonial_1850['Col Total/GtCO2']*warming_potential
colonial_1850['Difference in temp increase/degC']=colonial_1850['Col Temperature increase/degC']-colonial_1850['Temperature increase/degC']

In [87]:
#Get top 20 countries by cumulative emissions including colonial responsibility

#Get cumulative emissions in final year - corresponds to total emissions
cumulative_emissions_2023=colonial_1850[colonial_1850['year']==2023]

#Percentage share of total
cumulative_emissions_2023['Share/percent'] = cumulative_emissions_2023['Total/GtCO2']/cumulative_emissions_2023['Total/GtCO2'].sum()*100
cumulative_emissions_2023['Col share/percent'] = cumulative_emissions_2023['Col Total/GtCO2']/cumulative_emissions_2023['Col Total/GtCO2'].sum()*100
cumulative_emissions_2023['Change/percent'] = (cumulative_emissions_2023['Col Total/GtCO2']-cumulative_emissions_2023['Total/GtCO2'])/cumulative_emissions_2023['Total/GtCO2']*100

cumulative_emissions_2023_table=cumulative_emissions_2023.copy()

#Drop Bunkers from rankings - not a country
cumulative_emissions_2023=cumulative_emissions_2023[~(cumulative_emissions_2023['country']=='Bunkers')]

from functions import sort_cumu_emissions_terri_col,get_top20
#Sort cumulative emissions based on total and colonial total and print rankings
cumulative_emissions_2023=sort_cumu_emissions_terri_col(cumulative_emissions_2023,'Total/GtCO2','Col Total/GtCO2')

#Identify all countries that are in the two top 20 rankings - territorial and colonial
top20=get_top20(cumulative_emissions_2023,'Total/GtCO2','Col Cons Total/GtCO2')

#Include EU in ranking and remove any countries that are in EU
colonial_1850_EU = pd.concat([colonial_1850, EU_cumu_emissions], ignore_index=True)
EU_mask=colonial_1850_EU['country'].isin(EU_countries['country'])
colonial_1850_EU=colonial_1850_EU.loc[~EU_mask,:]
cumulative_emissions_2023_EU=colonial_1850_EU[colonial_1850_EU['year']==2023]

cumulative_emissions_2023_EU['Share/percent'] = cumulative_emissions_2023_EU['Total/GtCO2']/cumulative_emissions_2023_EU['Total/GtCO2'].sum()*100
cumulative_emissions_2023_EU['Col share/percent'] = cumulative_emissions_2023_EU['Col Total/GtCO2']/cumulative_emissions_2023_EU['Col Total/GtCO2'].sum()*100
cumulative_emissions_2023_EU['Change/percent'] = (cumulative_emissions_2023_EU['Col Total/GtCO2']-cumulative_emissions_2023_EU['Total/GtCO2'])/cumulative_emissions_2023_EU['Total/GtCO2']*100

cumulative_emissions_2023_EU=sort_cumu_emissions_terri_col(cumulative_emissions_2023_EU,'Total/GtCO2','Col Total/GtCO2')

#Identify all countries that are in the two top 20 rankings - territorial and colonial
top20_EU=get_top20(cumulative_emissions_2023_EU,'Total/GtCO2')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cumulative_emissions_2023['Share/percent'] = cumulative_emissions_2023['Total/GtCO2']/cumulative_emissions_2023['Total/GtCO2'].sum()*100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cumulative_emissions_2023['Col share/percent'] = cumulative_emissions_2023['Col Total/GtCO2']/cumulative_emissions_2023['Col Total/GtCO2'].sum()*100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.

In [88]:
#Calculate colonial emissions per capita - 1 - divided by population in each year
# look at cumulative territorial co2 emissions from fossil and land, per capita

#Calculate per capita emissions on per year basis (convert GtCO2 to tCO2)
col_emissions['Total PC1/tCO2/pc'] = col_emissions['Total/GtCO2']*1e9/col_emissions['population']
col_emissions['Col Total PC1/tCO2/pc'] = col_emissions['Col Total/GtCO2']*1e9/col_emissions['population']

cpc = col_emissions[['country','year','Total PC1/tCO2/pc','Col Total PC1/tCO2/pc','population']]
# calculate cumulative total
cpc = cpc.groupby(by=['country','year','population']).sum().groupby(level=[0]).cumsum().reset_index() # cumulative sum

#Select last year to consider total cumulative emissions
cumu_per_capita_emissions_2023=cpc[cpc['year']==2023]

#Create full dataset copy for table
cumu_per_capita_emissions_2023_table=cumu_per_capita_emissions_2023.copy()

#Eliminate all countries with 2023 population less than 2023
cumu_per_capita_emissions_2023=cumu_per_capita_emissions_2023[cumu_per_capita_emissions_2023['population']>1e6]
# cumu_per_capita_emissions_2023=cumu_per_capita_emissions_2023.drop(['region','image'],axis=1)
cumu_per_capita_emissions_2023=sort_cumu_emissions_terri_col(cumu_per_capita_emissions_2023,'Total PC1/tCO2/pc','Col Total PC1/tCO2/pc')

#Identify all countries that are in the two top 20 rankings - territorial and colonial
top20_per_capita=get_top20(cumu_per_capita_emissions_2023,'Total PC1/tCO2/pc')

#Include EU in ranking and remove any countries that are in EU+UK
col_emissions_EU = pd.concat([col_emissions, EU_emissions_sum], ignore_index=True)
EU_mask=col_emissions_EU['country'].isin(EU_countries['country'])
col_emissions_EU=col_emissions_EU.loc[~EU_mask,:]
col_emissions_EU['Total PC1/tCO2/pc'] = col_emissions_EU['Total/GtCO2']*1e9/col_emissions_EU['population']
col_emissions_EU['Col Total PC1/tCO2/pc'] = col_emissions_EU['Col Total/GtCO2']*1e9/col_emissions_EU['population']

cpc_EU = col_emissions_EU[['country','year','Total PC1/tCO2/pc','Col Total PC1/tCO2/pc','population']]
# calculate cumulative total
cpc_EU = cpc_EU.groupby(by=['country','year','population']).sum().groupby(level=[0]).cumsum().reset_index() # cumulative sum
cumu_per_capita_emissions_2023_EU=cpc_EU[cpc_EU['year']==2023]
#Eliminate all countries with 2023 population less than 2023
cumu_per_capita_emissions_2023_EU=cumu_per_capita_emissions_2023_EU[cumu_per_capita_emissions_2023_EU['population']>1e6]
#Sort
cumu_per_capita_emissions_2023_EU=sort_cumu_emissions_terri_col(cumu_per_capita_emissions_2023_EU,'Total PC1/tCO2/pc','Col Total PC1/tCO2/pc')

#Identify all countries that are in the two top 20 rankings - territorial and colonial
top20_per_capita_EU=get_top20(cumu_per_capita_emissions_2023_EU,'Total PC1/tCO2/pc')


# display(col_emissions[col_emissions['country']=='Christmas Island'])


In [89]:
#Calculate cumulative emissions per capita -2, divided by population in 2023

cpc2 = col_emissions[['country','year','Total/GtCO2','Col Total/GtCO2','population']]
# display(cpc2.head())
# calculate cumulative total
cpc2 = cpc2.groupby(by=['country','year']).sum().groupby(level=[0]).cumsum().reset_index() # cumulative sum
# display(cpc2.tail())

#Take 2023 year and remove cumulative population data
cumu_per_capita_2_emissions_2023=cpc2[cpc2['year']==2023].reset_index(drop=True)

#cumu_cons_per_capita_2_emissions_2023=cumu_per_capita_2_emissions_2023.drop(['region','image'],axis=1)
cumu_per_capita_2_emissions_2023=cumu_per_capita_2_emissions_2023.drop('population',axis=1)

# merge in population data
pop_data=col_emissions[['country','population']][col_emissions['year']==2023].reset_index(drop=True)
cumu_per_capita_2_emissions_2023 = pd.merge(cumu_per_capita_2_emissions_2023,pop_data,on='country')

#Calculate cumulative emissions per 2023 population
cumu_per_capita_2_emissions_2023['Total PC2/tCO2/pc']=cumu_per_capita_2_emissions_2023['Total/GtCO2']*1e9/cumu_per_capita_2_emissions_2023['population']
cumu_per_capita_2_emissions_2023['Col Total PC2/tCO2/pc']=cumu_per_capita_2_emissions_2023['Col Total/GtCO2']*1e9/cumu_per_capita_2_emissions_2023['population']

#Create full dataset copy for table
cumu_per_capita_2_emissions_2023_table=cumu_per_capita_2_emissions_2023.copy()

#Eliminate all countries with 2023 population less than 2023
cumu_per_capita_2_emissions_2023=cumu_per_capita_2_emissions_2023[cumu_per_capita_2_emissions_2023['population']>1e6]

cumu_per_capita_2_emissions_2023=sort_cumu_emissions_terri_col(cumu_per_capita_2_emissions_2023,'Total PC2/tCO2/pc','Col Total PC2/tCO2/pc')

#Identify all countries that are in the two top 20 rankings - territorial and colonial
top20_per_capita_2=get_top20(cumu_per_capita_2_emissions_2023,'Total PC2/tCO2/pc')

#Include EU+UK
cpc2_EU = col_emissions_EU[['country','year','Total/GtCO2','Col Total/GtCO2','population']]
# display(cpc2[cpc2['country']=='EU+UK'])
# calculate cumulative total
cpc2_EU = cpc2_EU.groupby(by=['country','year']).sum().groupby(level=[0]).cumsum().reset_index() # cumulative sum
# display(cpc2.tail())

#Take 2023 year and remove cumulative population data
cumu_per_capita_2_emissions_2023_EU=cpc2_EU[cpc2_EU['year']==2023].reset_index(drop=True)
#cumu_per_capita_2_emissions_2023=cumu_per_capita_2_emissions_2023.drop(['region','image'],axis=1)
cumu_per_capita_2_emissions_2023_EU=cumu_per_capita_2_emissions_2023_EU.drop('population',axis=1)
# display(cumu_per_capita_2_emissions_2023_EU[cumu_per_capita_2_emissions_2023_EU['country']=='EU+UK'])
# merge in population data
pop_data=col_emissions_EU[['country','population']][col_emissions_EU['year']==2023].reset_index(drop=True)
# display(pop_data)
cumu_per_capita_2_emissions_2023_EU = pd.merge(cumu_per_capita_2_emissions_2023_EU,pop_data,on='country')
# display(cumu_per_capita_2_emissions_2023_EU[cumu_per_capita_2_emissions_2023_EU['country']=='EU+UK'])
#Eliminate all countries with 2023 population less than 2023
cumu_per_capita_2_emissions_2023_EU=cumu_per_capita_2_emissions_2023_EU[cumu_per_capita_2_emissions_2023_EU['population']>1e6]

#Calculate cumulative emissions per 2023 population
cumu_per_capita_2_emissions_2023_EU['Total PC2/tCO2/pc']=cumu_per_capita_2_emissions_2023_EU['Total/GtCO2']*1e9/cumu_per_capita_2_emissions_2023_EU['population']
cumu_per_capita_2_emissions_2023_EU['Col Total PC2/tCO2/pc']=cumu_per_capita_2_emissions_2023_EU['Col Total/GtCO2']*1e9/cumu_per_capita_2_emissions_2023_EU['population']

# display(cumu_per_capita_2_emissions_2023.head())

cumu_per_capita_2_emissions_2023_EU=sort_cumu_emissions_terri_col(cumu_per_capita_2_emissions_2023_EU,'Total PC2/tCO2/pc','Col Total PC2/tCO2/pc')

#Identify all countries that are in the two top 20 rankings - territorial and colonial
top20_per_capita_2_EU=get_top20(cumu_per_capita_2_emissions_2023_EU,'Total PC2/tCO2/pc')




In [90]:
#Get top 20 countries by cumulative emissions including colonial responsibility and consumption emissions

#Get cumulative emissions in final year - corresponds to total emissions
cumulative_emissions_2023_cons=colonial_1850[colonial_1850['year']==2023]

cumulative_emissions_2023_cons_table=cumulative_emissions_2023_cons.copy()

#Drop Bunkers from rankings - not a country
cumulative_emissions_2023_cons=cumulative_emissions_2023_cons[~(cumulative_emissions_2023_cons['country']=='Bunkers')]

#Sort cumulative emissions based on total and colonial total and print rankings
cumulative_emissions_2023_cons=sort_cumu_emissions_terri_col(cumulative_emissions_2023_cons,'Total/GtCO2','Col Cons Total/GtCO2','Territorial Rank','Colonial + Consumption Rank')

#Identify all countries that are in the two top 20 rankings - territorial and colonial
top20_cons=get_top20(cumulative_emissions_2023_cons,'Total/GtCO2','Col Total/GtCO2')

#Include EU in ranking and remove any countries that are in EU
cumulative_emissions_2023_EU_cons=sort_cumu_emissions_terri_col(cumulative_emissions_2023_EU,'Total/GtCO2','Col Cons Total/GtCO2','Colonial Rank','Colonial + Consumption Rank')

#Identify all countries that are in the two top 20 rankings - territorial and colonial
top20_EU_cons=get_top20(cumulative_emissions_2023_EU_cons,'Total/GtCO2','Col Total/GtCO2')


In [91]:
#Calculate colonial emissions per capita - 1 - divided by population in each year - including consumption emissions

col_emissions['Cons Total PC1/tCO2/pc'] = col_emissions['Cons Total/GtCO2']*1e9/col_emissions['population']
col_emissions['Col Cons Total PC1/tCO2/pc'] = col_emissions['Col Cons Total/GtCO2']*1e9/col_emissions['population']

cpc_cons = col_emissions[['country','year','Total PC1/tCO2/pc','Cons Total PC1/tCO2/pc','Col Total PC1/tCO2/pc','Col Cons Total PC1/tCO2/pc','population']]
# calculate cumulative total
cpc_cons = cpc_cons.groupby(by=['country','year','population']).sum().groupby(level=[0]).cumsum().reset_index() # cumulative sum

#Select last year to consider total cumulative emissions
cumu_cons_per_capita_emissions_2023=cpc_cons[cpc_cons['year']==2023]

#Create full dataset copy for table
cumu_cons_per_capita_emissions_2023_table=cumu_cons_per_capita_emissions_2023.copy()

#Eliminate all countries with 2023 population less than 2023
cumu_cons_per_capita_emissions_2023=cumu_cons_per_capita_emissions_2023[cumu_cons_per_capita_emissions_2023['population']>1e6]

# cumu_per_capita_emissions_2023=cumu_per_capita_emissions_2023.drop(['region','image'],axis=1)
cumu_cons_per_capita_emissions_2023=sort_cumu_emissions_terri_col(cumu_cons_per_capita_emissions_2023,'Total PC1/tCO2/pc','Col Cons Total PC1/tCO2/pc','Territorial','Colonial + Consumption Rank')

#Identify all countries that are in the two top 20 rankings - consumption and colonial
top20_cons_per_capita=get_top20(cumu_cons_per_capita_emissions_2023,'Total PC1/tCO2/pc','Col Total PC1/tCO2/pc')

#Include EU in ranking and remove any countries that are in EU+UK
col_emissions_EU['Cons Total PC1/tCO2/pc'] = col_emissions_EU['Cons Total/GtCO2']*1e9/col_emissions_EU['population']
col_emissions_EU['Col Cons Total PC1/tCO2/pc'] = col_emissions_EU['Col Cons Total/GtCO2']*1e9/col_emissions_EU['population']

cpc_cons_EU = col_emissions_EU[['country','year','Total PC1/tCO2/pc','Cons Total PC1/tCO2/pc','Col Total PC1/tCO2/pc','Col Cons Total PC1/tCO2/pc','population']]
# calculate cumulative total
cpc_cons_EU = cpc_cons_EU.groupby(by=['country','year','population']).sum().groupby(level=[0]).cumsum().reset_index() # cumulative sum
cumu_cons_per_capita_emissions_2023_EU=cpc_cons_EU[cpc_cons_EU['year']==2023]
#Eliminate all countries with 2023 population less than 2023
cumu_cons_per_capita_emissions_2023_EU=cumu_cons_per_capita_emissions_2023_EU[cumu_cons_per_capita_emissions_2023_EU['population']>1e6]
#Sort
cumu_cons_per_capita_emissions_2023_EU=sort_cumu_emissions_terri_col(cumu_cons_per_capita_emissions_2023_EU,'Total PC1/tCO2/pc','Col Cons Total PC1/tCO2/pc','Territorial','Colonial + Consumption Rank')

#Identify all countries that are in the two top 20 rankings - territorial and colonial
top20_cons_per_capita_EU=get_top20(cumu_cons_per_capita_emissions_2023_EU,'Total PC1/tCO2/pc','Col Total PC1/tCO2/pc')


In [92]:
#Calculate cumulative emissions per capita -2, divided by population in 2023, including consumption emissions

cpc2_cons = col_emissions[['country','year','Total/GtCO2','Cons Total/GtCO2','Col Total/GtCO2','Col Cons Total/GtCO2','population']]
# calculate cumulative total
cpc2_cons = cpc2_cons.groupby(by=['country','year']).sum().groupby(level=[0]).cumsum().reset_index() # cumulative sum

#Take 2023 year and remove cumulative population data
cumu_cons_per_capita_2_emissions_2023=cpc2_cons[cpc2_cons['year']==2023].reset_index(drop=True)

#cumu_per_capita_2_emissions_2023=cumu_per_capita_2_emissions_2023.drop(['region','image'],axis=1)
cumu_cons_per_capita_2_emissions_2023=cumu_cons_per_capita_2_emissions_2023.drop('population',axis=1)

# merge in population data
pop_data=col_emissions[['country','population']][col_emissions['year']==2023].reset_index(drop=True)
cumu_cons_per_capita_2_emissions_2023 = pd.merge(cumu_cons_per_capita_2_emissions_2023,pop_data,on='country')

#Calculate cumulative emissions per 2023 population
cumu_cons_per_capita_2_emissions_2023['Total PC2/tCO2/pc']=cumu_cons_per_capita_2_emissions_2023['Total/GtCO2']*1e9/cumu_cons_per_capita_2_emissions_2023['population']
cumu_cons_per_capita_2_emissions_2023['Cons Total PC2/tCO2/pc']=cumu_cons_per_capita_2_emissions_2023['Cons Total/GtCO2']*1e9/cumu_cons_per_capita_2_emissions_2023['population']
cumu_cons_per_capita_2_emissions_2023['Col Total PC2/tCO2/pc']=cumu_cons_per_capita_2_emissions_2023['Col Total/GtCO2']*1e9/cumu_cons_per_capita_2_emissions_2023['population']
cumu_cons_per_capita_2_emissions_2023['Col Cons Total PC2/tCO2/pc']=cumu_cons_per_capita_2_emissions_2023['Col Cons Total/GtCO2']*1e9/cumu_cons_per_capita_2_emissions_2023['population']

#Create full dataset copy for table
cumu_cons_per_capita_2_emissions_2023_table=cumu_cons_per_capita_2_emissions_2023.copy()

#Eliminate all countries with 2023 population less than 2023
cumu_cons_per_capita_2_emissions_2023=cumu_cons_per_capita_2_emissions_2023[cumu_cons_per_capita_2_emissions_2023['population']>1e6]

cumu_cons_per_capita_2_emissions_2023=sort_cumu_emissions_terri_col(cumu_cons_per_capita_2_emissions_2023,'Total PC2/tCO2/pc','Col Cons Total PC2/tCO2/pc','Territorial','Colonial + Consumption Rank')

#Identify all countries that are in the two top 20 rankings - territorial and colonial
top20_cons_per_capita_2=get_top20(cumu_cons_per_capita_2_emissions_2023,'Total PC2/tCO2/pc','Col Total PC2/tCO2/pc')

#Include EU+UK
cpc2_cons_EU = col_emissions_EU[['country','year','Total/GtCO2','Cons Total/GtCO2','Col Total/GtCO2','Col Cons Total/GtCO2','population']]
# display(cpc2[cpc2['country']=='EU+UK'])
# calculate cumulative total
cpc2_cons_EU = cpc2_cons_EU.groupby(by=['country','year']).sum().groupby(level=[0]).cumsum().reset_index() # cumulative sum

#Take 2023 year and remove cumulative population data
cumu_cons_per_capita_2_emissions_2023_EU=cpc2_cons_EU[cpc2_cons_EU['year']==2023].reset_index(drop=True)
#cumu_per_capita_2_emissions_2023=cumu_per_capita_2_emissions_2023.drop(['region','image'],axis=1)
cumu_cons_per_capita_2_emissions_2023_EU=cumu_cons_per_capita_2_emissions_2023_EU.drop('population',axis=1)
# display(cumu_per_capita_2_emissions_2023_EU[cumu_per_capita_2_emissions_2023_EU['country']=='EU+UK'])
# merge in population data
pop_data=col_emissions_EU[['country','population']][col_emissions_EU['year']==2023].reset_index(drop=True)
# display(pop_data)
cumu_cons_per_capita_2_emissions_2023_EU = pd.merge(cumu_cons_per_capita_2_emissions_2023_EU,pop_data,on='country')
# display(cumu_per_capita_2_emissions_2023_EU[cumu_per_capita_2_emissions_2023_EU['country']=='EU+UK'])
#Eliminate all countries with 2023 population less than 2023
cumu_cons_per_capita_2_emissions_2023_EU=cumu_cons_per_capita_2_emissions_2023_EU[cumu_cons_per_capita_2_emissions_2023_EU['population']>1e6]

#Calculate cumulative emissions per 2023 population
cumu_cons_per_capita_2_emissions_2023_EU['Total PC2/tCO2/pc']=cumu_cons_per_capita_2_emissions_2023_EU['Total/GtCO2']*1e9/cumu_cons_per_capita_2_emissions_2023_EU['population']
cumu_cons_per_capita_2_emissions_2023_EU['Cons Total PC2/tCO2/pc']=cumu_cons_per_capita_2_emissions_2023_EU['Cons Total/GtCO2']*1e9/cumu_cons_per_capita_2_emissions_2023_EU['population']
cumu_cons_per_capita_2_emissions_2023_EU['Col Total PC2/tCO2/pc']=cumu_cons_per_capita_2_emissions_2023_EU['Col Total/GtCO2']*1e9/cumu_cons_per_capita_2_emissions_2023_EU['population']
cumu_cons_per_capita_2_emissions_2023_EU['Col Cons Total PC2/tCO2/pc']=cumu_cons_per_capita_2_emissions_2023_EU['Col Cons Total/GtCO2']*1e9/cumu_cons_per_capita_2_emissions_2023_EU['population']

# display(cumu_per_capita_2_emissions_2023.head())
cumu_cons_per_capita_2_emissions_2023_EU=sort_cumu_emissions_terri_col(cumu_cons_per_capita_2_emissions_2023_EU,'Total PC2/tCO2/pc','Col Cons Total PC2/tCO2/pc','Territorial','Colonial + Consumption Rank')

#Identify all countries that are in the two top 20 rankings - territorial and colonial
top20_cons_per_capita_2_EU=get_top20(cumu_cons_per_capita_2_emissions_2023_EU,'Total PC2/tCO2/pc','Col Total PC2/tCO2/pc')


In [93]:
#Summary table for all countries - showing cumulative emissions in 2023, split by fossil/land/empire and per capita values

#Merge per capita statistics
summary_table=pd.merge(cumulative_emissions_2023_table,cumu_per_capita_emissions_2023_table[['country','Total PC1/tCO2/pc','Col Total PC1/tCO2/pc']],on='country',how='left')
summary_table=pd.merge(summary_table,cumu_per_capita_2_emissions_2023_table[['country','Total PC2/tCO2/pc','Col Total PC2/tCO2/pc']],on='country',how='left')
summary_table=pd.merge(summary_table,cumu_cons_per_capita_emissions_2023_table[['country','Cons Total PC1/tCO2/pc','Col Cons Total PC1/tCO2/pc']],on='country',how='left')
summary_table=pd.merge(summary_table,cumu_cons_per_capita_2_emissions_2023_table[['country','Cons Total PC2/tCO2/pc','Col Cons Total PC2/tCO2/pc']],on='country',how='left')

#Add in EU+UK data
EU_table=colonial_1850_EU[(colonial_1850_EU['country']=='EU+UK') & (colonial_1850_EU['year']==2023)]
EU_table=pd.merge(EU_table,cumu_per_capita_emissions_2023_EU[['country','Total PC1/tCO2/pc','Col Total PC1/tCO2/pc']],on='country',how='left')
EU_table=pd.merge(EU_table,cumu_per_capita_2_emissions_2023_EU[['country','Total PC2/tCO2/pc','Col Total PC2/tCO2/pc']],on='country',how='left')
EU_table=pd.merge(EU_table,cumu_cons_per_capita_emissions_2023_EU[['country','Cons Total PC1/tCO2/pc','Col Cons Total PC1/tCO2/pc']],on='country',how='left')
EU_table=pd.merge(EU_table,cumu_cons_per_capita_2_emissions_2023_EU[['country','Cons Total PC2/tCO2/pc','Col Cons Total PC2/tCO2/pc']],on='country',how='left')

#Make modifications
#Rename Bunkers
summary_table.loc[summary_table['country']=='Bunkers', 'country'] = 'International Transport'
summary_table.loc[summary_table['country']=='Viet Nam', 'country'] = 'Vietnam'
#Remove Antarctica and non-existent countries
countries_to_remove=['Antarctica','French Equatorial Africa','French West Africa','Panama Canal Zone']
summary_table=summary_table[~summary_table['country'].isin(countries_to_remove)]

#Eliminate all countries with 2023 population less than 2023
summary_table=summary_table[(summary_table['population']>1e6) | (summary_table['country']=='International Transport')]

summary_table=pd.concat([summary_table,EU_table],axis=0)
summary_table.sort_values(by='country',ascending=True,inplace=True)

#Convert population into millions of people
summary_table['population']=summary_table['population']/1e6

#Remove year columns
summary_table=summary_table.drop('year',axis=1)

#Make copy of table for later use
summary_table_copy=summary_table.copy()

#Replace nan and inf with 'n/a'
summary_table.replace(to_replace=np.inf,value='n/a',inplace=True)
summary_table.fillna(value='n/a',inplace=True)
#Rename columns
summary_table=summary_table.rename(columns={'population':'Population (2023), millions','Total/GtCO2':'Territorial Total/GtCO2','Col Total/GtCO2':'Colonial Total/GtCO2','Emp Total/GtCO2':'Empire Total/GtCO2','Total PC1/tCO2/pc':'Territorial per yearly capita /tCO2/pc','Col Total PC1/tCO2/pc':'Colonial per yearly capita /tCO2/pc','Total PC2/tCO2/pc':'Territorial per 2023 capita /tCO2/pc','Col Total PC2/tCO2/pc':'Colonial per 2023 capita /tCO2/pc',
                                            'Cons Total PC1/tCO2/pc':'Consumption per yearly capita /tCO2/pc','Col Cons Total PC1/tCO2/pc':'Colonial consumption per yearly capita /tCO2/pc','Cons Total PC2/tCO2/pc':'Consumption per 2023 capita /tCO2/pc','Col Cons Total PC2/tCO2/pc':'Colonial consumption per 2023 capita /tCO2/pc'})

#Set index to country
summary_table.set_index('country',inplace=True)

summary_table.to_csv('output-clean/full_summary_table_2023.csv')

display(summary_table)

#Create simpler summary table to share in article
# [Table breaking down various metrics for all countries in 2023, in a sortable table. I think this should include population, territorial emissions, colonial emissions, colonial+consumption emissions and 2x per capita emissions measures.]
summary_table_article=summary_table[['Population (2023), millions','Territorial Total/GtCO2','Colonial Total/GtCO2','Col Cons Total/GtCO2','Colonial per yearly capita /tCO2/pc','Colonial per 2023 capita /tCO2/pc']]

summary_table_article.to_csv('output-clean/article_summary_table_2023.csv')


  summary_table.fillna(value='n/a',inplace=True)


Unnamed: 0_level_0,"Population (2023), millions",Fossil CO2/GtCO2,Land CO2/GtCO2,Territorial Total/GtCO2,Emp Fossil Total/GtCO2,Emp Land Total/GtCO2,Empire Total/GtCO2,Col Fossil Total/GtCO2,Col Land Total/GtCO2,Colonial Total/GtCO2,...,Col share/percent,Change/percent,Territorial per yearly capita /tCO2/pc,Colonial per yearly capita /tCO2/pc,Territorial per 2023 capita /tCO2/pc,Colonial per 2023 capita /tCO2/pc,Consumption per yearly capita /tCO2/pc,Colonial consumption per yearly capita /tCO2/pc,Consumption per 2023 capita /tCO2/pc,Colonial consumption per 2023 capita /tCO2/pc
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,42.239856,0.242534,0.947266,1.189800,0.000000,0.000000,0.000000,0.242534,0.947266,1.189800,...,0.04652,0.0,156.070675,156.070675,28.167706,28.167706,156.070675,156.070675,28.167706,28.167706
Albania,2.832443,0.303385,0.168068,0.471453,0.000000,0.000000,0.000000,0.303385,0.168068,0.471453,...,0.018433,0.0,283.140243,283.140243,166.447505,166.447505,295.958694,295.958694,179.990491,179.990491
Algeria,45.606480,5.226865,0.575065,5.801930,-0.081335,-0.434386,-0.515720,5.145531,0.140679,5.286210,...,0.206686,-8.888774,247.363053,177.741999,127.217235,115.909182,247.363053,177.741999,127.217235,115.909182
Angola,36.684212,0.701266,7.644643,8.345909,-0.038077,-4.626460,-4.664536,0.663189,3.018183,3.681373,...,0.143939,-55.890094,1330.188591,181.890328,227.506839,100.353053,1330.188591,181.890328,227.506839,100.353053
Argentina,45.773888,9.021836,15.835832,24.857669,0.000000,0.000000,0.000000,9.021836,15.835832,24.857669,...,0.971914,0.0,2677.751771,2677.751771,543.053472,543.053472,2672.567028,2672.567028,538.365748,538.365748
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Venezuela,28.838500,7.984782,6.539654,14.524435,0.000000,0.000000,0.000000,7.984782,6.539654,14.524435,...,0.567893,0.0,1977.468067,1977.468067,503.647395,503.647395,1912.854192,1912.854192,442.977298,442.977298
Vietnam,98.858944,5.297644,8.829129,14.126773,-0.145389,-3.615647,-3.761036,5.152256,5.213481,10.365737,...,0.405292,-26.623463,480.81053,197.177885,142.898278,104.853808,465.987488,182.354843,128.935182,90.890712
Yemen,34.449824,0.677382,0.003090,0.680472,-0.044107,0.016183,-0.027925,0.633275,0.019273,0.652548,...,0.025514,-4.103703,39.725725,37.059706,19.752564,18.941977,39.725725,37.059706,19.752564,18.941977
Zambia,20.569738,0.273526,4.871264,5.144790,-0.051281,-2.221508,-2.272788,0.222246,2.649756,2.872001,...,0.112293,-44.176504,1633.821768,517.088471,250.114499,139.622657,1639.150557,522.41726,253.348608,142.856766


In [94]:
#Graphic 1: world fossil vs land emissions 1850 - 2023
world_fossil_land=col_emissions[['year','Fossil CO2/GtCO2','Land CO2/GtCO2']].groupby('year').sum()
world_fossil_land_cumu=colonial_1850[['year','Fossil CO2/GtCO2','Land CO2/GtCO2']].groupby('year').sum()
world_fossil_land=world_fossil_land.merge(world_fossil_land_cumu,on='year',suffixes=['_year','_cumu'])
display(world_fossil_land)

world_fossil_land.to_csv('output-clean/plot_landvfossil.csv')

Unnamed: 0_level_0,Fossil CO2/GtCO2_year,Land CO2/GtCO2_year,Fossil CO2/GtCO2_cumu,Land CO2/GtCO2_cumu
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1850,0.196896,2.419535,0.196896,2.419535
1851,0.198805,2.507709,0.395701,4.927243
1852,0.207551,2.545875,0.603252,7.473119
1853,0.217209,2.603205,0.820461,10.076324
1854,0.255139,2.625506,1.075600,12.701830
...,...,...,...,...
2019,37.072149,4.542377,1659.658875,733.143476
2020,35.254667,4.040317,1694.913542,737.183793
2021,37.114432,4.003708,1732.027973,741.187501
2022,37.944759,4.003708,1769.972733,745.191208


In [95]:
#Graphic 2: colonial_emissions, including EU+UK
cumulative_emissions_2023_w_EU=summary_table_copy.copy()

#Drop Bunkers from rankings - not a country
cumulative_emissions_2023_w_EU=cumulative_emissions_2023_w_EU[~(cumulative_emissions_2023_w_EU['country']=='Bunkers')]

cumulative_emissions_2023_w_EU=cumulative_emissions_2023_w_EU.sort_values(by='Col Total/GtCO2',ascending=False)

cumulative_emissions_2023_w_EU['Pos Emp/GtCO2']=cumulative_emissions_2023_w_EU['Emp Total/GtCO2'][cumulative_emissions_2023_w_EU['Emp Total/GtCO2']>0]
cumulative_emissions_2023_w_EU['Neg Emp/GtCO2']=cumulative_emissions_2023_w_EU['Emp Total/GtCO2'][cumulative_emissions_2023_w_EU['Emp Total/GtCO2']<0]
cumulative_emissions_2023_w_EU['Neg Emp/GtCO2']=cumulative_emissions_2023_w_EU['Neg Emp/GtCO2']*-1
#Set Total for those with negative empire emissions to subtract the negative empire emissions already so that when plotted, it adds up to the total
cumulative_emissions_2023_w_EU['Total Plot/GtCO2']=np.where(cumulative_emissions_2023_w_EU['Emp Total/GtCO2']<0,cumulative_emissions_2023_w_EU['Total/GtCO2']-cumulative_emissions_2023_w_EU['Neg Emp/GtCO2'],cumulative_emissions_2023_w_EU['Total/GtCO2'])
# cumulative_emissions_2023_region['Total/GtCO2']=cumulative_emissions_2023_region['Total/GtCO2'][cumulative_emissions_2023_region['Emp Total/GtCO2']>0]
cumulative_emissions_2023_w_EU=cumulative_emissions_2023_w_EU.rename(columns={'Total Plot/GtCO2':'Within own borders','Pos Emp/GtCO2':'From former territories','Neg Emp/GtCO2':'Allocated to colonial power'})

cumulative_emissions_2023_w_EU=cumulative_emissions_2023_w_EU[['country','Within own borders','From former territories','Allocated to colonial power']]
cumulative_emissions_2023_w_EU=cumulative_emissions_2023_w_EU.set_index('country')
cumulative_emissions_2023_w_EU=cumulative_emissions_2023_w_EU.head(20)

cumulative_emissions_2023_w_EU

cumulative_emissions_2023_w_EU.to_csv('output-clean/plot_top20_EU-UK.csv')

In [96]:
#Graphic 3: Ranking chart: territorial vs Colonial
from functions import get_top21

cumulative_emissions_2023_w_EU_rankings=summary_table_copy.copy()

#Drop Bunkers from rankings - not a country
cumulative_emissions_2023_w_EU_rankings=cumulative_emissions_2023_w_EU_rankings[~(cumulative_emissions_2023_w_EU_rankings['country']=='Bunkers')]

cumulative_emissions_2023_w_EU_rankings=sort_cumu_emissions_terri_col(cumulative_emissions_2023_w_EU_rankings,'Total/GtCO2','Col Total/GtCO2')

cumulative_emissions_2023_w_EU_rankings=get_top21(cumulative_emissions_2023_w_EU_rankings,'Total/GtCO2')

cumulative_emissions_2023_w_EU_rankings=cumulative_emissions_2023_w_EU_rankings[['country','Territorial Rank','Colonial Rank']]
cumulative_emissions_2023_w_EU_rankings.set_index('country',inplace=True)

display(cumulative_emissions_2023_w_EU_rankings)

cumulative_emissions_2023_w_EU_rankings.to_csv('output-clean/plot_ranking_chart.csv')

Unnamed: 0_level_0,Territorial Rank,Colonial Rank
country,Unnamed: 1_level_1,Unnamed: 2_level_1
USA,1,1
EU+UK,2,2
China,3,3
Russia,4,4
United Kingdom,9,5
Brazil,5,6
Germany,6,7
India,8,8
Japan,10,9
Indonesia,7,10


In [97]:
#Graphics 4 and 5 - breakdown for United Kingdom and Netherlands

#Save empire contributions to file and plot for each empire country
empire_countries_to_plot=['United Kingdom','Netherlands']
for empire in empire_countries_to_plot:
    empire_contributions=emp_emissions_by_country[[empire]].sort_values(by=empire,ascending=False)
    empire_contributions.loc[empire,'Territorial']=empire_contributions.loc[empire,empire]
    empire_contributions.loc[empire,empire]=np.nan
    empire_contributions=empire_contributions.fillna(0)
    empire_contributions=empire_contributions.loc[(empire_contributions!= 0).any(axis=1)]
    total = empire_contributions[empire].sum()
    empire_contributions['%'] = (empire_contributions[empire] / total)
    empire_contributions['cumu %'] = empire_contributions['%'].cumsum()
    empire_contributions=empire_contributions.rename({empire:'Empire'},axis=1)

#Process data for plotting
    if len(empire_contributions.index)>10:
        if empire=='United Kingdom':
            empire_contributions_OG=empire_contributions.copy()
            empire_contributions=empire_contributions.merge(regions_country,left_index=True,right_index=True,how='left')
            mask_AFR=(empire_contributions['Empire']<1) & (empire_contributions['region']=='Africa') & (empire_contributions['Territorial']==0)
            mask_ASI=(empire_contributions['Empire']<1) & (empire_contributions['region']=='Asia') & (empire_contributions['Territorial']==0)
            mask_other=(empire_contributions['Empire']<1) & (empire_contributions['region']!='Asia') & (empire_contributions['region']!='Africa') & (empire_contributions['Territorial']==0)
            mask_list = []
            mask_list.append(mask_AFR)
            mask_list.append(mask_ASI)
            mask_list.append(mask_other)
            mask_string_list=['Other Africa','Other Asia','Other']
            for iiii in range(len(mask_string_list)):
                mask=mask_list[iiii]
                mask_string=mask_string_list[iiii]
                countries=empire_contributions_OG.loc[mask,'Empire']
                empire_contributions.loc[mask_string,:]=empire_contributions_OG.loc[mask,:].sum()
                empire_contributions.loc[mask_string,'cumu %']=1
                empire_contributions.loc[mask_string,'Territorial']=0
            mask_full=(empire_contributions['Empire']<1) & (empire_contributions['Territorial']==0)
            countries_full=empire_contributions_OG.loc[mask_full,'Empire']
            empire_contributions=empire_contributions[~empire_contributions['Empire'].isin(countries_full)]
        else:
            if len(empire_contributions.index)>10:
                mask=empire_contributions['cumu %']>1-0.10
                countries=empire_contributions.loc[mask,'Empire']
                empire_contributions.loc['Other',:]=empire_contributions.loc[mask,:].sum()
                empire_contributions.loc['Other','cumu %']=1
                empire_contributions.loc['Other','Territorial']=0
                empire_contributions=empire_contributions[~empire_contributions['Empire'].isin(countries)]
    
    #Save data for plotting
    if empire in empire_countries_to_plot:
        empire_contributions.rename(columns={'Empire':'From former territories','Territorial':'Within own borders'},inplace=True)
        if empire in ['United Kingdom']:
            empire_contributions_UK=empire_contributions[['From former territories','Within own borders']]
            empire_contributions_UK.to_csv('output-clean/plot_UK.csv')
            display(empire_contributions_UK)

        if empire in ['Netherlands']:
            empire_contributions_NL=empire_contributions[['From former territories','Within own borders']]
            empire_contributions_NL.to_csv('output-clean/plot_NL.csv')
            display(empire_contributions_NL)


Unnamed: 0,From former territories,Within own borders
United Kingdom,0.0,76.419854
India,12.952888,0.0
Myanmar,7.348448,0.0
Nigeria,5.115535,0.0
Australia,3.639546,0.0
Malaysia,3.301426,0.0
Zambia,2.272788,0.0
Tanzania,2.250291,0.0
Bangladesh,2.199895,0.0
New Zealand,1.440853,0.0


Unnamed: 0,From former territories,Within own borders
Indonesia,22.170339,0.0
Netherlands,0.0,12.648447
Curaçao,0.58249,0.0
Suriname,0.079099,0.0


In [98]:

#Graphic 6: consumption based emissions, including colonial
cumulative_emissions_2023_cons=summary_table_copy.copy()

#Drop Bunkers from rankings - not a country
cumulative_emissions_2023_cons=cumulative_emissions_2023_cons[~(cumulative_emissions_2023_cons['country']=='Bunkers')]

#Drop EU+UK from rankings - not a country
cumulative_emissions_2023_cons=cumulative_emissions_2023_cons[~(cumulative_emissions_2023_cons['country']=='EU+UK')]

cumulative_emissions_2023_cons['Traded CO2/GtCO2']=cumulative_emissions_2023_cons['Col Cons Total/GtCO2']-cumulative_emissions_2023_cons['Col Total/GtCO2']
cumulative_emissions_2023_cons['Pos Traded CO2/GtCO2']=cumulative_emissions_2023_cons['Traded CO2/GtCO2'][cumulative_emissions_2023_cons['Traded CO2/GtCO2']>0]
cumulative_emissions_2023_cons['Neg Traded CO2/GtCO2']=cumulative_emissions_2023_cons['Traded CO2/GtCO2'][cumulative_emissions_2023_cons['Traded CO2/GtCO2']<0]
cumulative_emissions_2023_cons['Neg Traded CO2/GtCO2']=cumulative_emissions_2023_cons['Neg Traded CO2/GtCO2']*-1
cumulative_emissions_2023_cons=cumulative_emissions_2023_cons[['country','Col Total/GtCO2','Col Cons Total/GtCO2','Traded CO2/GtCO2','Pos Traded CO2/GtCO2','Neg Traded CO2/GtCO2']]

cumulative_emissions_2023_cons=cumulative_emissions_2023_cons.rename(columns={'Col Total/GtCO2_x':'Col Total/GtCO2','Col Total/GtCO2_y':'Col Total/GtCO2 (pre-1990)'})
#Sort by colonial consumption based emissions
cumulative_emissions_2023_cons=cumulative_emissions_2023_cons.sort_values(by='Col Cons Total/GtCO2',ascending=False)

#Set Total for those with negative empire emissions to subtract the negative empire emissions already so that when plotted, it adds up to the total
cumulative_emissions_2023_cons['Total Plot/GtCO2']=np.where(cumulative_emissions_2023_cons['Traded CO2/GtCO2']<0,cumulative_emissions_2023_cons['Col Total/GtCO2']-cumulative_emissions_2023_cons['Neg Traded CO2/GtCO2'],cumulative_emissions_2023_cons['Col Total/GtCO2'])


# cumulative_emissions_2023_region['Total/GtCO2']=cumulative_emissions_2023_region['Total/GtCO2'][cumulative_emissions_2023_region['Emp Total/GtCO2']>0]
cumulative_emissions_2023_cons=cumulative_emissions_2023_cons.rename(columns={'Total Plot/GtCO2':'Cumulative colonial emissions','Pos Traded CO2/GtCO2':'Trade imported CO2 (since 1990)','Neg Traded CO2/GtCO2':'Trade exported CO2 (since 1990)'})
cumulative_emissions_2023_cons=cumulative_emissions_2023_cons.rename(columns={'Total Plot 1990/GtCO2':'Cumulative colonial emissions (since 1990)','Col Total/GtCO2 (pre-1990)':'Cumulative colonial emissions (pre 1990)','Pos Traded CO2/GtCO2':'Trade imported CO2 (since 1990)','Neg Traded CO2/GtCO2':'Trade exported CO2 (since 1990)'})

cumulative_emissions_2023_cons=cumulative_emissions_2023_cons.set_index('country')
cumulative_emissions_2023_cons=cumulative_emissions_2023_cons.head(20)

cumulative_emissions_2023_cons_table=cumulative_emissions_2023_cons[['Cumulative colonial emissions','Trade imported CO2 (since 1990)','Trade exported CO2 (since 1990)']]

cumulative_emissions_2023_cons_table

cumulative_emissions_2023_cons_table.to_csv('output-clean/plot_consumption.csv')

In [99]:
#Graphic 7: per 2023 capita, including EU+UK
cumulative_per_capita_emissions_2023_w_EU=summary_table_copy.copy()

def plot_per_capita(sort_value,ref_value,cumulative_per_capita_emissions_2023_w_EU,head_count,extra_countries):
    cumulative_per_capita_emissions_2023_w_EU=cumulative_per_capita_emissions_2023_w_EU.sort_values(by=sort_value,ascending=False)
    cumulative_per_capita_emissions_2023_w_EU['Emp Total PC2/tCO2/pc']=cumulative_per_capita_emissions_2023_w_EU[sort_value]-cumulative_per_capita_emissions_2023_w_EU[ref_value]

    cumulative_per_capita_emissions_2023_w_EU['Pos Emp Total PC2/tCO2/pc']=cumulative_per_capita_emissions_2023_w_EU['Emp Total PC2/tCO2/pc'][cumulative_per_capita_emissions_2023_w_EU['Emp Total PC2/tCO2/pc']>0]
    cumulative_per_capita_emissions_2023_w_EU['Neg Emp Total PC2/tCO2/pc']=cumulative_per_capita_emissions_2023_w_EU['Emp Total PC2/tCO2/pc'][cumulative_per_capita_emissions_2023_w_EU['Emp Total PC2/tCO2/pc']<0]
    cumulative_per_capita_emissions_2023_w_EU['Neg Emp Total PC2/tCO2/pc']=cumulative_per_capita_emissions_2023_w_EU['Neg Emp Total PC2/tCO2/pc']*-1

    #Set Total for those with negative empire emissions to subtract the negative empire emissions already so that when plotted, it adds up to the total
    cumulative_per_capita_emissions_2023_w_EU['Total Plot/GtCO2']=np.where(cumulative_per_capita_emissions_2023_w_EU['Emp Total PC2/tCO2/pc']<0,cumulative_per_capita_emissions_2023_w_EU[ref_value]-cumulative_per_capita_emissions_2023_w_EU['Neg Emp Total PC2/tCO2/pc'],cumulative_per_capita_emissions_2023_w_EU[ref_value])
    # cumulative_emissions_2023_region['Total/GtCO2']=cumulative_emissions_2023_region['Total/GtCO2'][cumulative_emissions_2023_region['Emp Total/GtCO2']>0]
    cumulative_per_capita_emissions_2023_w_EU=cumulative_per_capita_emissions_2023_w_EU.rename(columns={'Total Plot/GtCO2':'Within own borders','Pos Emp Total PC2/tCO2/pc':'From former territories','Neg Emp Total PC2/tCO2/pc':'Allocated to colonial power'})

    cumulative_per_capita_emissions_2023_w_EU=cumulative_per_capita_emissions_2023_w_EU[cumulative_per_capita_emissions_2023_w_EU['population']>1]

    cumulative_per_capita_emissions_2023_w_EU=cumulative_per_capita_emissions_2023_w_EU[['country','Within own borders','From former territories','Allocated to colonial power']]
    cumulative_per_capita_emissions_2023_w_EU=cumulative_per_capita_emissions_2023_w_EU.set_index('country')
    cumulative_per_capita_emissions_2023_w_EU_top=cumulative_per_capita_emissions_2023_w_EU.head(head_count)
    #Add extra
    cumulative_per_capita_emissions_2023_w_EU_extra=cumulative_per_capita_emissions_2023_w_EU[cumulative_per_capita_emissions_2023_w_EU.index.isin(extra_countries)]
    cumulative_per_capita_emissions_2023_w_EU_top=pd.concat([cumulative_per_capita_emissions_2023_w_EU_top,cumulative_per_capita_emissions_2023_w_EU_extra],axis=0)
    # display(cumulative_per_capita_emissions_2023_w_EU_top)
    return cumulative_per_capita_emissions_2023_w_EU_top

extra_countries=['Portugal','France','New Zealand','China','India']
sort_value='Col Total PC2/tCO2/pc'
ref_value='Total PC2/tCO2/pc'
head_count=10

cumulative_per_capita_2_emissions_2023_w_EU_top=plot_per_capita(sort_value,ref_value,cumulative_per_capita_emissions_2023_w_EU,head_count,extra_countries)

display(cumulative_per_capita_2_emissions_2023_w_EU_top)

cumulative_per_capita_2_emissions_2023_w_EU_top.to_csv('output-clean/plot_per_current_pop.csv')

extra_countries=['New Zealand','Malaysia','Indonesia','China','India']
sort_value='Col Total PC1/tCO2/pc'
ref_value='Total PC1/tCO2/pc'
head_count=10

#Graphic 8: per yearly capita top 10 with selected others, including EU+UK
cumulative_per_capita_emissions_2023_w_EU_top=plot_per_capita(sort_value,ref_value,cumulative_per_capita_emissions_2023_w_EU,head_count,extra_countries)

display(cumulative_per_capita_emissions_2023_w_EU_top)

cumulative_per_capita_emissions_2023_w_EU_top.to_csv('output-clean/plot_cumulative_per_capita.csv')


Unnamed: 0_level_0,Within own borders,From former territories,Allocated to colonial power
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Netherlands,717.915438,1295.921452,
United Kingdom,1128.188141,793.397823,
Russia,1176.289693,478.691284,
USA,1551.640019,8.455808,
Canada,1523.933318,,30.990666
Belgium,1114.745508,372.290978,
Germany,1098.833889,6.031767,
Australia,1087.753585,,125.743113
Austria,573.095894,413.580704,
Trinidad and Tobago,945.737583,,177.867224


Unnamed: 0_level_0,Within own borders,From former territories,Allocated to colonial power
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Netherlands,1077.474515,4027.528903,
USA,3970.742296,27.837288,
Canada,3913.70103,,386.069932
United Kingdom,1593.886376,1274.937616,
Argentina,2677.751771,,
Qatar,2501.910059,,849.54022
Nicaragua,2387.061403,,
Russia,1597.064689,674.500787,
Australia,2195.691768,,1785.62406
Costa Rica,2094.436538,,


In [100]:
#Calculate carbon budget remaining at the end of each year from 1850 onwards

#Calculate up to date carbon budget from 1850
#https://www.nature.com/articles/s41558-023-01848-5
#"We conclude that the RCB for a 50% chance of keeping warming to 1.5 °C is around 250 GtCO 2 as of January 2023
#Calculate total emissions since 1850 up to January 2023"
remaining_carbon_budget_Jan_2023=250
emissions_total_1850_2022=emissions_data_final[emissions_data_final['year']<2023]
emissions_total_1850_2022_sum=emissions_total_1850_2022['Total/GtCO2'].sum()
#Calculate up to date carbon budget
carbon_budget_1850=emissions_total_1850_2022_sum+remaining_carbon_budget_Jan_2023

# added to this total is estimated cumulative emissions since 1850, per this analysis
world = emissions_data_final[['year','Total/GtCO2']].pivot_table(index='year',values='Total/GtCO2',aggfunc='sum').reset_index()
world['Total/GtCO2'] = world['Total/GtCO2']
world['cumulative Total/GtCO2'] = world['Total/GtCO2'].cumsum()
world['budget_left'] = carbon_budget_1850 - world['cumulative Total/GtCO2']
world['pct remaining'] = world['budget_left']/carbon_budget_1850*100
world['pct used'] = world['cumulative Total/GtCO2']/carbon_budget_1850*100

#Calculate warming from cumulative CO2 emissions every year
colonial_warming_by_year=colonial_1850[['year','Temperature increase/degC']].groupby('year').sum()

#Combine budget with the temperature increase dataframe
world_budget_temp=world.set_index('year')
world_budget_temp=world_budget_temp.merge(colonial_warming_by_year,left_index=True,right_index=True,how='left')

# world=world[(world['year']!=1849)]
world=world.drop('Total/GtCO2',axis=1)

world_budget_temp.to_csv('output-clean/plot_carbon_budget.csv')
world_budget_temp


Unnamed: 0_level_0,Total/GtCO2,cumulative Total/GtCO2,budget_left,pct remaining,pct used,Temperature increase/degC
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1850,2.616431,2.616431,2762.547510,99.905379,0.094621,0.001177
1851,2.706514,5.322944,2759.840997,99.807500,0.192500,0.002395
1852,2.753426,8.076371,2757.087570,99.707924,0.292076,0.003634
1853,2.820414,10.896785,2754.267156,99.605926,0.394074,0.004904
1854,2.880645,13.777430,2751.386511,99.501750,0.498250,0.006200
...,...,...,...,...,...,...
2019,41.614526,2392.802351,372.361590,13.466167,86.533833,1.076761
2020,39.294984,2432.097335,333.066606,12.045094,87.954906,1.094444
2021,41.118139,2473.215474,291.948467,10.558089,89.441911,1.112947
2022,41.948467,2515.163941,250.000000,9.041055,90.958945,1.131824
