In [1]:
# Imports

import pandas as pd
import json
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Read the two csv datasets

world_co2 = pd.read_csv("owid-co2-data.csv")
sector_co2 = pd.read_csv("ghg-emissions-by-sector.csv")

In [3]:
# Read the geojson file with countries and coordinates

with open("WorldMapGeo.js") as file:
     geo = json.loads(file.read())

In [4]:
# Common elements between the two datasets

names_one = list(world_co2['country'].value_counts().index)
names_two = list(sector_co2['Entity'].value_counts().index)
common_elements = set(names_one) & set(names_two)

# Keep the countries only
continents_reference = ['Africa', 'Antarctica', 'Asia', 'Oceania', 'Europe', 'North America', 'South America']
incomes_reference = ['Low-income countries', 'Lower-middle-income countries', 'Upper-middle-income countries', 'High-income countries']
continents = list(set(continents_reference) & set(common_elements))
incomes_classification = list(set(incomes_reference) & set(common_elements))
countries = list(((set(common_elements) ^ set(continents)) ^ set(incomes_classification)) ^ set(['World', 'European Union (27)']))
world_co2_overlap = world_co2[world_co2['country'].isin(common_elements)]
countries_co2 = world_co2_overlap[world_co2_overlap['country'].isin(countries)]
countries_sector = sector_co2[sector_co2['Entity'].isin(countries)]

# Keep only the time period 1990-2019
countries_co2 = countries_co2[countries_co2['year'].isin(np.linspace(1990, 2019, 30))]

# Set the same column names for the common columns and merge the two datasets
countries_sector.rename(columns={"Entity": "country", "Code": "iso_code", "Year": "year"}, inplace=True)
countries_co2.reset_index(drop=True, inplace=True)
countries_sector.reset_index(drop=True, inplace=True)
merged = pd.merge(countries_co2, countries_sector, how="inner", on=["country", "iso_code", "year"]);

In [5]:
# Replace country names in merged dataset to map with the geojson file
replace_countries = [
    ('Bahamas', 'The Bahamas'), 
    ('Congo', 'Republic of the Congo'),
    ('Democratic Republic of Congo', 'Democratic Republic of the Congo'),
    ('Timor', 'Timor-Leste'),
    ('Svalbard and Jan Mayen', 'Svalbard'),
    ('Czechia', 'Czech Republic'),
    ('Gambia', 'The Gambia'),
    ('Eswatini', 'Swaziland'),
    ('Micronesia (country)', 'Federated States of Micronesia'),
    ('North Macedonia', 'Macedonia')
]

for pair in replace_countries:
    merged.country = merged.country.replace(pair[0], pair[1])

In [6]:
# Create list of relevant columns to add to geojson

all_sectors = ['Agriculture', 'Land-use change and forestry', 'Waste', 'Industry',
               'Manufacturing and construction', 'Transport', 'Electricity and heat',
               'Buildings', 'Fugitive emissions', 'Other fuel combustion', 'Aviation and shipping']
all_gases = ['co2_including_luc', 'methane', 'nitrous_oxide']
global_emission = 'total_ghg'

# Create output geojson by adding to the previous one the ghg emissions for each country

for feature in geo['features']:
    try: 
        
        # Map name
        
        country_name = feature['properties']['name']
        country = merged[merged.country == country_name]
            
        # Global ghg for world map 
        try: 
            feature['properties'][global_emission] = list(country.loc[:, global_emission])
        except IndexError:
            print("INDEX ERROR: ", global_emission, country_name)
            
        # Emissions by sector 2nd chart (barplot)        
        ghg_sectors = []
        for sec in all_sectors:
            ghg_sectors.append({"Sector": sec, "ghg": list(country.loc[:, sec])})
            try: 
                feature['properties']["sectors"] = ghg_sectors
            except IndexError:
                print("INDEX ERROR: ", sec, country_name)
                
        # Gas repartition for 3rd chart (stacked bars)        
        gases = []
        for gas in all_gases:
            gases.append({"Gas": gas, "Emission": list(country.loc[:, gas])})
            try: 
                feature['properties']["gases"] = gases
            except IndexError:
                print("INDEX ERROR: ", gas, country_name)
                
    except KeyError:
        print("Country not found: ", country_name)
        
# Save new geojson

with open("output_geojson.js", "w") as js_file:
    json.dump(geo, js_file)

In [7]:
# Verify geojson

with open("output_geojson.js") as f:
     output = json.loads(f.read())
df = pd.DataFrame(output['features'])
df.drop(columns=['type', 'geometry'], inplace=True)
df = pd.concat([df.drop('properties', axis=1), pd.DataFrame(df['properties'].tolist())], axis=1)
df.head(3)

Unnamed: 0,id,name,total_ghg,sectors,gases
0,AF,Afghanistan,"[9.58, 9.81, 9.03, 9.11, 9.15, 9.58, 10.61, 11...","[{'Sector': 'Agriculture', 'ghg': [8069999.694...","[{'Gas': 'co2_including_luc', 'Emission': [2.4..."
1,AL,Albania,"[11.3, 8.96, 6.95, 6.92, 7.74, 7.58, 7.29, 6.5...","[{'Sector': 'Agriculture', 'ghg': [3549999.952...","[{'Gas': 'co2_including_luc', 'Emission': [7.2..."
2,DZ,Algeria,"[121.4, 125.57, 126.53, 135.01, 136.96, 143.9,...","[{'Sector': 'Agriculture', 'ghg': [7869999.885...","[{'Gas': 'co2_including_luc', 'Emission': [88...."
