In [17]:
# Add this at the top of your notebook
%load_ext autoreload
%autoreload 2  
# Automatically reload all modules before executing code

from airpollutionpy import extraction as no2
import ee
import geemap
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import os
import sys
from datetime import datetime

In [18]:
no2.initialize_earth_engine()

Earth Engine already initialized


In [8]:
kenya_adm1 = gpd.read_file('../data/boundaries/kenya/ken_admbnda_adm1_iebc_20191031.shp')
nairobi = kenya_adm1[kenya_adm1['ADM1_EN']=='Nairobi']
nairobi.to_file('../data/boundaries/kenya/nairobi.geojson', driver='GeoJSON')

In [12]:
kenya_adm2 = gpd.read_file('../data/boundaries/kenya/ken_admbnda_adm2_iebc_20191031.shp')
nairobi_adm2 = kenya_adm2[kenya_adm2['ADM1_EN']=='Nairobi']
nairobi_adm2.to_file('../data/boundaries/kenya/nairobi_adm2.geojson', driver='GeoJSON')

In [20]:
addis = gpd.read_file('../data/boundaries/ethiopia/eth_admbnda_adm1_csa_bofedb_2021.shp')
addis = addis[addis['ADM1_EN']=='Addis Ababa']

In [56]:
# country_list = ['ETH', 'KEN']
# gdf = pd.DataFrame()
# for name in country_list:
    
#     df = get_boundaries('ISO_A3', name)
    
#     if isinstance(df, pd.DataFrame):
#         gdf = pd.concat([gdf, df])
#     elif isinstance(df, str):
#         country = san_fac.loc[name, 'Country']
#         df = get_boundaries('WB_NAME', country)
#         if isinstance(df, pd.DataFrame):
#             gdf = pd.concat([gdf, df])
#         else:
#             print(name)

In [21]:
start_date = '2019-01-01'
end_date = '2024-12-01'

In [23]:
# Correct setup
aoi = geemap.geopandas_to_ee(addis)  
admin_regions = geemap.geopandas_to_ee(addis)

monthly_data = no2.process_no2_data(
    start_date=start_date,
    end_date=end_date,
    aoi=aoi,
    admin_regions=admin_regions,
    temporal_resolution='monthly',
    spatial_resolution='admin',
    collection_type="OFFL"
)

Processing NO2 data from 2019-01-01 to 2024-12-01
Temporal resolution: monthly
Spatial resolution: admin
Collection type: OFFL
Admin regions count: 1
Number of available images in date range: 30464
Processing month: 2019-01-01 to 2019-02-01
Month 2019-01-01: Reduced regions result size: 1
Processing month: 2019-02-01 to 2019-03-01
Month 2019-02-01: Reduced regions result size: 1
Processing month: 2019-03-01 to 2019-04-01
Month 2019-03-01: Reduced regions result size: 1
Processing month: 2019-04-01 to 2019-05-01
Month 2019-04-01: Reduced regions result size: 1
Processing month: 2019-05-01 to 2019-06-01
Month 2019-05-01: Reduced regions result size: 1
Processing month: 2019-06-01 to 2019-07-01
Month 2019-06-01: Reduced regions result size: 1
Processing month: 2019-07-01 to 2019-08-01
Month 2019-07-01: Reduced regions result size: 1
Processing month: 2019-08-01 to 2019-09-01
Month 2019-08-01: Reduced regions result size: 1
Processing month: 2019-09-01 to 2019-10-01
Month 2019-09-01: Reduc

In [None]:
adm3 = list(eth_adm3['ADM3_EN'].unique())

In [24]:
monthly_df = no2.export_no2_data(
    collection=monthly_data,
    description="Monthly_NO2_Adm2",
    output_file="addis_adm1_no2_monthly.csv",
    destination="datalab-air-pollution/ETH/ADM1",
    export_type="GCS",
    return_df=False,
    # admin_codes=adm3,
    # admin_code_field='ADM3_EN'
)

Using years: [2019, 2020, 2021, 2022, 2023, 2024]
Using temporal splitting strategy by year
Started export task for 2019
Started export task for 2020
Started export task for 2021
Started export task for 2022
Started export task for 2023
Started export task for 2024
Started 6 export tasks in total


In [25]:
from airpollutionpy import downloadgcs
import os

bucket_name = "datalab-air-pollution"
prefix = 'ETH/ADM1/'
output_folder = os.path.abspath("../data/airpollution/ethiopia/ethiopia_adm1/raw/monthly/")

os.makedirs(output_folder, exist_ok=True)

downloadgcs.download_bucket_files(bucket_name, output_folder=output_folder, prefix=prefix)

Found 7 files in bucket
c:\Users\wb588851\OneDrive - WBG\Documents\air-pollution-analysis\data\airpollution\ethiopia\ethiopia_adm1\raw\monthly
c:\Users\wb588851\OneDrive - WBG\Documents\air-pollution-analysis\data\airpollution\ethiopia\ethiopia_adm1\raw\monthly
Downloading ETH/ADM1/addis_adm1_no2_monthly_2019.csv...
c:\Users\wb588851\OneDrive - WBG\Documents\air-pollution-analysis\data\airpollution\ethiopia\ethiopia_adm1\raw\monthly
Downloading ETH/ADM1/addis_adm1_no2_monthly_2020.csv...
c:\Users\wb588851\OneDrive - WBG\Documents\air-pollution-analysis\data\airpollution\ethiopia\ethiopia_adm1\raw\monthly
Downloading ETH/ADM1/addis_adm1_no2_monthly_2021.csv...
c:\Users\wb588851\OneDrive - WBG\Documents\air-pollution-analysis\data\airpollution\ethiopia\ethiopia_adm1\raw\monthly
Downloading ETH/ADM1/addis_adm1_no2_monthly_2022.csv...
c:\Users\wb588851\OneDrive - WBG\Documents\air-pollution-analysis\data\airpollution\ethiopia\ethiopia_adm1\raw\monthly
Downloading ETH/ADM1/addis_adm1_no2_mo

In [26]:
import glob
import pandas as pd

files = glob.glob('../data/airpollution/ethiopia/ethiopia_adm1/raw/monthly/*.csv')

nairobi_adm1_no2_monthly = pd.DataFrame()

for file in files:
    df = pd.read_csv(file)
    df['date'] = pd.to_datetime(df['date'])
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df.drop(columns=['system:index'], inplace=True)
    nairobi_adm1_no2_monthly = pd.concat([nairobi_adm1_no2_monthly, df], ignore_index=True)
    

In [28]:
nairobi_adm1_no2_monthly.to_csv('../data/airpollution/ethiopia/ethiopia_adm1/processed/addis_adm1_no2_monthly_2019_2024.csv', )