In [5]:
import geopandas as gpd
import rasterio
from rasterio.merge import merge
from rasterstats import zonal_stats
import pandas as pd
import glob
import os
import requests


In [2]:
import os
import requests

# Define the base URL for EDGAR CO2 TOTALS emissions data
base_url = "https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/EDGAR/datasets/EDGAR_2024_GHG/CO2/TOTALS/emi_nc/"

# Define the years to download
years = list(range(2010, 2024))

# Define the destination folder
destination_folder = r"C:\Users\vgald\OneDrive\Desktop\SAR_DATA\1. Data\GHG\EDGAR_CO2"

# Ensure the destination folder exists
os.makedirs(destination_folder, exist_ok=True)

# Function to download a file
def download_file(url, save_path):
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        with open(save_path, "wb") as file:
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)
        print(f"Downloaded: {save_path}")
    except requests.exceptions.RequestException as e:
        print(f"Failed to download {url}: {e}")

# Loop through each year and download the corresponding ZIP file
for year in years:
    file_name = f"EDGAR_2024_GHG_CO2_{year}_TOTALS_emi_nc.zip"
    file_url = base_url + file_name
    save_path = os.path.join(destination_folder, file_name)
    download_file(file_url, save_path)

print("Download process completed.")


Downloaded: C:\Users\vgald\OneDrive\Desktop\SAR_DATA\1. Data\GHG\EDGAR_CO2\EDGAR_2024_GHG_CO2_2010_TOTALS_emi_nc.zip
Downloaded: C:\Users\vgald\OneDrive\Desktop\SAR_DATA\1. Data\GHG\EDGAR_CO2\EDGAR_2024_GHG_CO2_2011_TOTALS_emi_nc.zip
Downloaded: C:\Users\vgald\OneDrive\Desktop\SAR_DATA\1. Data\GHG\EDGAR_CO2\EDGAR_2024_GHG_CO2_2012_TOTALS_emi_nc.zip
Downloaded: C:\Users\vgald\OneDrive\Desktop\SAR_DATA\1. Data\GHG\EDGAR_CO2\EDGAR_2024_GHG_CO2_2013_TOTALS_emi_nc.zip
Downloaded: C:\Users\vgald\OneDrive\Desktop\SAR_DATA\1. Data\GHG\EDGAR_CO2\EDGAR_2024_GHG_CO2_2014_TOTALS_emi_nc.zip
Downloaded: C:\Users\vgald\OneDrive\Desktop\SAR_DATA\1. Data\GHG\EDGAR_CO2\EDGAR_2024_GHG_CO2_2015_TOTALS_emi_nc.zip
Downloaded: C:\Users\vgald\OneDrive\Desktop\SAR_DATA\1. Data\GHG\EDGAR_CO2\EDGAR_2024_GHG_CO2_2016_TOTALS_emi_nc.zip
Downloaded: C:\Users\vgald\OneDrive\Desktop\SAR_DATA\1. Data\GHG\EDGAR_CO2\EDGAR_2024_GHG_CO2_2017_TOTALS_emi_nc.zip
Downloaded: C:\Users\vgald\OneDrive\Desktop\SAR_DATA\1. Data\GHG

In [None]:
import os
import zipfile
import rasterio
import geopandas as gpd
import numpy as np
import pandas as pd
from shapely.geometry import box

# Paths
zip_folder = r"C:\Users\vgald\OneDrive\Desktop\SAR_DATA\1. Data\GHG\EDGAR_CO2"
unzip_folder = os.path.join(zip_folder, "unzipped")
output_folder = r"C:\Users\vgald\OneDrive\Desktop\SAR_DATA\3. Output\GHG"
shapefile_path = r"C:\Users\vgald\OneDrive\Desktop\SAR_DATA\1. Data\Shapefile\WB_GAD\WB_GAD_ADM0_SAR_Clean.shp"

# Create folders if needed
os.makedirs(unzip_folder, exist_ok=True)
os.makedirs(output_folder, exist_ok=True)

# Load and reproject shapefile
shp_df = gpd.read_file(shapefile_path).to_crs("EPSG:4326")

# Raster-to-vector conversion
def raster_to_polygons(src, data, transform):
    polygons, values, areas = [], [], []
    height, width = data.shape
    res_x, res_y = src.res
    for row in range(height):
        for col in range(width):
            val = data[row, col]
            if not np.ma.is_masked(val) and not np.isnan(val):
                x, y = transform * (col, row)
                poly = box(x, y - res_y, x + res_x, y)
                polygons.append(poly)
                values.append(val / 1000)  # Convert to kilotonnes
                areas.append(poly.area)
    return gpd.GeoDataFrame({'emissions_kt': values, 'cell_area': areas, 'geometry': polygons}, crs="EPSG:4326")


# Loop over years
for year in range(2010, 2024):
    print(f"\nProcessing year {year}...")

    zip_path = os.path.join(zip_folder, f"EDGAR_2024_GHG_CO2_{year}_TOTALS_emi_nc.zip")
    nc_filename = f"EDGAR_2024_GHG_CO2_{year}_TOTALS_emi.nc"
    nc_path = os.path.join(unzip_folder, nc_filename)

    # Unzip if needed
    if not os.path.exists(nc_path):
        try:
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall(unzip_folder)
                print(f"Unzipped: {zip_path}")
        except Exception as e:
            print(f"Error unzipping {zip_path}: {e}")
            continue

    # Open and process raster
    try:
         with rasterio.open(f'NETCDF:"{nc_path}":emissions') as src:
            data = src.read(1, masked=True)
            transform = src.transform

            gdf_raster = raster_to_polygons(src, data, transform)
            intersections = gpd.overlay(gdf_raster, shp_df, how='intersection')

            intersections['intersection_area'] = intersections.geometry.area
            intersections['weighted_emi_kt'] = intersections['emissions_kt'] * (intersections['intersection_area'] / intersections['cell_area'])

            # Aggregate by admin unit
            grouped = intersections.groupby('globalid').agg({
                'weighted_emi_kt': 'sum'
            }).reset_index().rename(columns={'weighted_emi_kt': 'total_emission_kt'})
            grouped['year'] = year
            grouped['geo_level'] = 0

            # Merge with shapefile (remove geometry)
            merged_df = shp_df.merge(grouped, on='globalid', how='left').drop(columns='geometry', errors='ignore')

            # Save full output
            output_csv_full = os.path.join(output_folder, f"emissions_by_admin0_full_{year}.csv")
            merged_df.to_csv(output_csv_full, index=False)
            print(f"Saved: {output_csv_full}")

            # Aggregated version
            agg_df = merged_df.groupby('L0_CODE').agg({
                'L0_NAME': 'first',
                'total_emission_kt': 'sum',
                'geo_level': 'first',
                'wb_status': 'first',
                'sovereign': 'first',
                'Disputed': 'first',
                'year': 'first'
            }).reset_index()

            output_csv_agg = os.path.join(output_folder, f"emissions_by_admin0_aggregated_{year}.csv")
            agg_df.to_csv(output_csv_agg, index=False)
            print(f"Saved: {output_csv_agg}")

    except Exception as e:
        print(f"Error processing {nc_path}: {e}")



Processing year 2010...



  intersections['intersection_area'] = intersections.geometry.area


Saved: C:\Users\vgald\OneDrive\Desktop\SAR_DATA\3. Output\GHG\emissions_by_admin0_full_2010.csv
Saved: C:\Users\vgald\OneDrive\Desktop\SAR_DATA\3. Output\GHG\emissions_by_admin0_aggregated_2010.csv

Processing year 2011...



  intersections['intersection_area'] = intersections.geometry.area


Saved: C:\Users\vgald\OneDrive\Desktop\SAR_DATA\3. Output\GHG\emissions_by_admin0_full_2011.csv
Saved: C:\Users\vgald\OneDrive\Desktop\SAR_DATA\3. Output\GHG\emissions_by_admin0_aggregated_2011.csv

Processing year 2012...



  intersections['intersection_area'] = intersections.geometry.area


Saved: C:\Users\vgald\OneDrive\Desktop\SAR_DATA\3. Output\GHG\emissions_by_admin0_full_2012.csv
Saved: C:\Users\vgald\OneDrive\Desktop\SAR_DATA\3. Output\GHG\emissions_by_admin0_aggregated_2012.csv

Processing year 2013...


In [None]:
import os
import zipfile
import rasterio
import geopandas as gpd
import numpy as np
import pandas as pd
from shapely.geometry import box

# Paths
zip_folder = r"C:\Users\vgald\OneDrive\Desktop\SAR_DATA\1. Data\GHG\EDGAR_CO2"
unzip_folder = os.path.join(zip_folder, "unzipped")
output_folder = r"C:\Users\vgald\OneDrive\Desktop\SAR_DATA\3. Output\GHG"
shapefile_path = r"C:\Users\vgald\OneDrive\Desktop\SAR_DATA\1. Data\Shapefile\WB_GAD\WB_GAD_ADM1_SAR_Clean.shp"  

# Create folders if needed
os.makedirs(unzip_folder, exist_ok=True)
os.makedirs(output_folder, exist_ok=True)

# Load and reproject shapefile
shp_df = gpd.read_file(shapefile_path).to_crs("EPSG:4326")

# Raster-to-vector conversion
def raster_to_polygons(src, data, transform):
    polygons, values, areas = [], [], []
    height, width = data.shape
    res_x, res_y = src.res
    for row in range(height):
        for col in range(width):
            val = data[row, col]
            if not np.ma.is_masked(val) and not np.isnan(val):
                x, y = transform * (col, row)
                poly = box(x, y - res_y, x + res_x, y)
                polygons.append(poly)
                values.append(val / 1000)  # Tonnes → kilotonnes
                areas.append(poly.area)
    return gpd.GeoDataFrame({'emissions_kt': values, 'cell_area': areas, 'geometry': polygons}, crs="EPSG:4326")

# Loop over years
for year in range(2010, 2024):
    print(f"\nProcessing year {year}...")

    zip_path = os.path.join(zip_folder, f"EDGAR_2024_GHG_CO2_{year}_TOTALS_emi_nc.zip")
    nc_filename = f"EDGAR_2024_GHG_CO2_{year}_TOTALS_emi.nc"
    nc_path = os.path.join(unzip_folder, nc_filename)

    # Unzip if needed
    if not os.path.exists(nc_path):
        try:
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall(unzip_folder)
                print(f"Unzipped: {zip_path}")
        except Exception as e:
            print(f"Error unzipping {zip_path}: {e}")
            continue

    try:
        with rasterio.open(f'NETCDF:"{nc_path}":emissions') as src:
            data = src.read(1, masked=True)
            transform = src.transform

            gdf_raster = raster_to_polygons(src, data, transform)
            intersections = gpd.overlay(gdf_raster, shp_df, how='intersection')

            intersections['intersection_area'] = intersections.geometry.area
            intersections['weighted_emi_kt'] = intersections['emissions_kt'] * (intersections['intersection_area'] / intersections['cell_area'])

            # Aggregate by ADM1 unit
            grouped = intersections.groupby('globalid').agg({
                'weighted_emi_kt': 'sum'
            }).reset_index().rename(columns={'weighted_emi_kt': 'total_emission_kt'})
            grouped['year'] = year
            grouped['geo_level'] = 1

            # Merge with ADM1 shapefile
            merged_df = shp_df.merge(grouped, on='globalid', how='left').drop(columns='geometry', errors='ignore')

            # Save full output
            output_csv_full = os.path.join(output_folder, f"emissions_by_admin1_full_{year}.csv")
            merged_df.to_csv(output_csv_full, index=False)
            print(f"Saved: {output_csv_full}")

            # Aggregated by L1_CODE
            agg_df = merged_df.groupby('L1_CODE').agg({
                'L0_CODE': 'first',
                'L0_NAME': 'first',
                'L1_NAME': 'first',
                'total_emission_kt': 'sum',
                'geo_level': 'first',
                'wb_status': 'first',
                'sovereign': 'first',
                'Disputed': 'first',
                'year': 'first'
            }).reset_index()

            output_csv_agg = os.path.join(output_folder, f"emissions_by_admin1_aggregated_{year}.csv")
            agg_df.to_csv(output_csv_agg, index=False)
            print(f"Saved: {output_csv_agg}")

    except Exception as e:
        print(f"Error processing {nc_path}: {e}")


In [None]:
import os
import zipfile
import rasterio
import geopandas as gpd
import numpy as np
import pandas as pd
from shapely.geometry import box

# Paths
zip_folder = r"C:\Users\vgald\OneDrive\Desktop\SAR_DATA\1. Data\GHG\EDGAR_CO2"
unzip_folder = os.path.join(zip_folder, "unzipped")
output_folder = r"C:\Users\vgald\OneDrive\Desktop\SAR_DATA\3. Output\GHG"
shapefile_path = r"C:\Users\vgald\OneDrive\Desktop\SAR_DATA\1. Data\Shapefile\WB_GAD\WB_GAD_ADM2_SAR_Clean.shp"  # <--- ADM2

# Create folders if needed
os.makedirs(unzip_folder, exist_ok=True)
os.makedirs(output_folder, exist_ok=True)

# Load and reproject shapefile
shp_df = gpd.read_file(shapefile_path).to_crs("EPSG:4326")

# Raster-to-vector conversion
def raster_to_polygons(src, data, transform):
    polygons, values, areas = [], [], []
    height, width = data.shape
    res_x, res_y = src.res
    for row in range(height):
        for col in range(width):
            val = data[row, col]
            if not np.ma.is_masked(val) and not np.isnan(val):
                x, y = transform * (col, row)
                poly = box(x, y - res_y, x + res_x, y)
                polygons.append(poly)
                values.append(val / 1000)  # Tonnes → kilotonnes
                areas.append(poly.area)
    return gpd.GeoDataFrame({'emissions_kt': values, 'cell_area': areas, 'geometry': polygons}, crs="EPSG:4326")

# Loop over years
for year in range(2010, 2024):
    print(f"\nProcessing year {year}...")

    zip_path = os.path.join(zip_folder, f"EDGAR_2024_GHG_CO2_{year}_TOTALS_emi_nc.zip")
    nc_filename = f"EDGAR_2024_GHG_CO2_{year}_TOTALS_emi.nc"
    nc_path = os.path.join(unzip_folder, nc_filename)

    # Unzip if needed
    if not os.path.exists(nc_path):
        try:
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall(unzip_folder)
                print(f"Unzipped: {zip_path}")
        except Exception as e:
            print(f"Error unzipping {zip_path}: {e}")
            continue

    try:
        with rasterio.open(f'NETCDF:"{nc_path}":emissions') as src:
            data = src.read(1, masked=True)
            transform = src.transform

            gdf_raster = raster_to_polygons(src, data, transform)
            intersections = gpd.overlay(gdf_raster, shp_df, how='intersection')

            intersections['intersection_area'] = intersections.geometry.area
            intersections['weighted_emi_kt'] = intersections['emissions_kt'] * (intersections['intersection_area'] / intersections['cell_area'])

            # Aggregate by ADM2 unit
            grouped = intersections.groupby('globalid').agg({
                'weighted_emi_kt': 'sum'
            }).reset_index().rename(columns={'weighted_emi_kt': 'total_emission_kt'})
            grouped['year'] = year
            grouped['geo_level'] = 2

            # Merge with ADM2 shapefile
            merged_df = shp_df.merge(grouped, on='globalid', how='left').drop(columns='geometry', errors='ignore')

            # Save full output
            output_csv_full = os.path.join(output_folder, f"emissions_by_admin2_full_{year}.csv")
            merged_df.to_csv(output_csv_full, index=False)
            print(f"Saved: {output_csv_full}")

            # Aggregated by L2_CODE
            agg_df = merged_df.groupby('L2_CODE').agg({
                'L0_CODE': 'first',
                'L0_NAME': 'first',
                'L1_CODE': 'first',
                'L1_NAME': 'first',
                'L2_NAME': 'first',
                'total_emission_kt': 'sum',
                'geo_level': 'first',
                'wb_status': 'first',
                'sovereign': 'first',
                'Disputed': 'first',
                'year': 'first'
            }).reset_index()

            output_csv_agg = os.path.join(output_folder, f"emissions_by_admin2_aggregated_{year}.csv")
            agg_df.to_csv(output_csv_agg, index=False)
            print(f"Saved: {output_csv_agg}")

    except Exception as e:
        print(f"Error processing {nc_path}: {e}")
