Download csv without geotiffs using the api or manually

Standardise the column names to match IMEO MARS data format

In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [2]:
download_url = 'https://data.carbonmapper.org/#1.81/30.8/50.5'
download_date = '11032025'

In [3]:
#----------------------------------------------------------------------------------------------------------
# sensor, sector, country, date, emission rate (kg/h), uncertainty (kg/h), latitude, longitude, comments, platform | 
#----------------------------------------------------------------------------------------------------------

In [4]:
directory = f'satellite/data/carbonmapper/downloaded_{download_date}' 

search_string = 'plumes_'

dfs = []

# Data comes as annual csv's. Merge into a single file 
for filename in os.listdir(directory):
    if search_string in filename:
        
        file_path = os.path.join(directory, filename)
        
        df = pd.read_csv(file_path)  
        
        dfs.append(df)

combined_df = pd.concat(dfs, ignore_index=True)

# keep imeo mars sector categories 'Oil and Gas' 'Coal' 'Waste'
combined_df['ipcc_sector'] = combined_df['ipcc_sector'].replace('Solid Waste (6A)', 'Waste')
combined_df['ipcc_sector'] = combined_df['ipcc_sector'].replace('Waste Water (6B)', 'Waste')

combined_df['ipcc_sector'] = combined_df['ipcc_sector'].replace('Oil & Gas (1B2)', 'Oil and Gas') 
combined_df['ipcc_sector'] = combined_df['ipcc_sector'].replace('Coal Mining (1B1a)', 'Coal')

combined_df.rename(columns={'plume_latitude': 'latitude'}, inplace=True)
combined_df.rename(columns={'plume_longitude': 'longitude'}, inplace=True)

combined_df.rename(columns={'emission_auto': 'emission rate (kg/h)'}, inplace=True)
combined_df.rename(columns={'emission_uncertainty_auto': 'uncertainty (kg/h)'}, inplace=True)
combined_df.rename(columns={'platform': 'sensor'}, inplace=True)
combined_df.rename(columns={'ipcc_sector': 'sector'}, inplace=True)	

combined_df.rename(columns={'datetime': 'date'}, inplace=True)	

combined_df['date'] = pd.to_datetime(combined_df['date'], format='mixed')
combined_df['date'] = combined_df['date'].dt.strftime('%d/%m/%Y')

combined_df = combined_df.dropna(subset=['emission rate (kg/h)'])

combined_df['comments'] = f'Downloaded from {download_url} on the {download_date}. Cleaned using eda_carbon_mapper.ipynb'
combined_df['platform'] = 'Carbon Mapper'
combined_df['sensor'] = combined_df['sensor'].replace('ISS', 'EMIT - NASA') #stick with mars naming convention

Find country name from lat, lon using country pologons 

In [5]:
import geopandas as gpd
from shapely.geometry import Point

fname = 'satellite/data/naturalearth_country_shapefiles/ne_110m_admin_0_countries.shp' 

# Load the country boundaries shapefile
world = gpd.read_file(fname)

def get_country_from_lat_lng(latitude, longitude):
    
    point = Point(longitude, latitude)
    
    # Find the country that contains the point
    for _, country in world.iterrows():
        if country['geometry'].contains(point):
            return country['NAME']
    
    return None

get_countries = True # switch on/off

if get_countries:

    # Get country name from lat, lon
    combined_df['country'] = combined_df.apply(lambda row: get_country_from_lat_lng(row['latitude'], row['longitude']), axis=1)


Write out cleaned data

In [6]:
columns_to_keep = ['sensor', 'sector',  'country','date', 'emission rate (kg/h)', 'uncertainty (kg/h)', 'latitude', 'longitude', 'comments', 'platform']
combined_df = combined_df[columns_to_keep]

out_file = os.path.join(directory, f'carbon_mapper_cleaned_{download_date}.csv')
combined_df.to_csv(out_file, index=False) 