- Annual brick production in Bangladesh in 2018: 33 billion for 7,859 kilns
- https://www.ccacoalition.org/sites/default/files/resources/2019_Report_Bangladesh%20Brick%20Sector%20Roadmap.pdf

In [2]:
import xarray as xr
import os
import pandas as pd
import numpy as np
import dask
import dask.array as da
import netCDF4
import zarr
import gcsfs
import esmpy
import xesmf as xe
import geopandas as gpd
import rioxarray
import matplotlib.pyplot as plt
from shapely.geometry import mapping
import cartopy.crs as ccrs
from shapely.ops import transform
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import pycountry_convert as pc
import glob
import calendar
import datetime
from shapely import wkt
from shapely.geometry import Point

In [3]:
path = "/Users/akawano/Library/CloudStorage/GoogleDrive-akawano@stanford.edu/My Drive/MyProjects/04_brick_kiln_emissions/emission_data"
dist_path = "/Users/akawano/Library/CloudStorage/GoogleDrive-akawano@stanford.edu/My Drive/MyProjects/04_brick_kiln_emissions"

In [99]:
# Import kiln location data
kilns_gps = gpd.read_file(os.path.join(path, "gps_all_kilns.shp"))
print(kilns_gps.head())

RCT_kilns = kilns_gps[kilns_gps['category']=="RCT"]
print(RCT_kilns.head())

Scaling_kilns = kilns_gps[kilns_gps['category']=="Scaling"]
print(Scaling_kilns.head())

PNAS_kilns = kilns_gps[kilns_gps['category']=="PNAS"]
PNAS_kilns.head()

  kiln_type  kiln_id category    division                         geometry
0       fck   100000     PNAS  Chittagong  POINT (5241276.392 3739925.248)
1       fck   100001     PNAS  Chittagong  POINT (5241189.938 3740082.634)
2       fck   100002     PNAS  Chittagong  POINT (5240070.054 3740287.121)
3       fck   100003     PNAS  Chittagong  POINT (5241021.955 3740369.603)
4       fck   100004     PNAS  Chittagong  POINT (5239764.288 3751000.298)
     kiln_type  kiln_id category division                         geometry
6129    zigzag       17      RCT   Khulna  POINT (4919851.782 3911492.283)
6130    zigzag       16      RCT   Khulna  POINT (4925799.527 3922897.878)
6131    zigzag        3      RCT   Khulna  POINT (4896473.825 3925735.083)
6132    zigzag        7      RCT   Khulna  POINT (4932952.327 3932790.311)
6133    zigzag       23      RCT   Khulna  POINT (4939397.309 3940380.672)
     kiln_type  kiln_id category division                         geometry
6719    zigzag    26454  

Unnamed: 0,kiln_type,kiln_id,category,division,geometry
0,fck,100000,PNAS,Chittagong,POINT (5241276.392 3739925.248)
1,fck,100001,PNAS,Chittagong,POINT (5241189.938 3740082.634)
2,fck,100002,PNAS,Chittagong,POINT (5240070.054 3740287.121)
3,fck,100003,PNAS,Chittagong,POINT (5241021.955 3740369.603)
4,fck,100004,PNAS,Chittagong,POINT (5239764.288 3751000.298)


## PM2.5 emissions
- SEC (total_energy_input / weight_bricks_fired) x weight_bricks_fired x **emission factor (Zigzag: 0.25)** x  total number of bricks produced over the entire season, (measured in 100,000s)

- Emission factor: https://link.springer.com/article/10.1007/s11869-018-0596-y/tables/4
- FCK: 0.27

## CO2 emissions 
- SEC (total_energy_input / weight_bricks_fired) x weight_bricks_fired x **emissiong factor (25.8)** x  **carbon to CO2 conversion factor (3.67)** x total number of bricks produced over the entire season (measured in 100,000s)
- emission factor = CEF is the IPCC default carbon emission factor for the other bituminous coal (25.8 tC/TJ)

In [173]:
# Confirm emissions factor used for the calculation of PM2.5 emissions
pd.set_option('display.max_columns', None)
emi = pd.read_csv(os.path.join(path, "rct_emissions_data.csv"))
emi['pm_2_5_emissions']*1000/emi['energy_input_1_lakh']

0      0.25
1      0.25
2      0.25
3      0.25
4      0.25
       ... 
271    0.25
272    0.25
273    0.25
274    0.25
275    0.25
Length: 276, dtype: float64

In [102]:
emi = emi[['kiln_id','kiln_district','co2_emission','pm_2_5_emissions','annual_production_actual_lakh_bricks','firing_month',
           'firing_week','firing_end_month','firing_end_week','weight_bricks_fired','total_energy_input','sec','total_production_kpm']].copy()
emi['kiln_type'] = 'zigzag'
emi['pm25_emi_factor'] = 0.25
emi['co2_emi_factor'] = 25.8
emi['co2_conversion_factor'] = 3.67
emi

Unnamed: 0,kiln_id,kiln_district,co2_emission,pm_2_5_emissions,annual_production_actual_lakh_bricks,firing_month,firing_week,firing_end_month,firing_end_week,weight_bricks_fired,total_energy_input,sec,total_production_kpm,kiln_type,pm25_emi_factor,co2_emi_factor,co2_conversion_factor
0,44053,jhenaidah,21.189124,55.945769,75.82,november,2,may,3,164281.875,119556.107746,0.727750,0.106693,zigzag,0.25,25.8,3.67
1,41091,jashore,31.556736,83.319434,82.65,december,2,july,4,105302.400,111981.319041,1.063426,0.111686,zigzag,0.25,25.8,3.67
2,41046,jashore,30.307780,80.021808,32.52,november,4,july,2,118776.000,125474.194629,1.056394,0.097995,zigzag,0.25,25.8,3.67
3,41075,jashore,28.498033,75.243522,33.45,december,3,july,2,124340.000,120389.635622,0.968229,0.099609,zigzag,0.25,25.8,3.67
4,47051,khulna,30.727405,81.129748,109.20,november,2,july,4,151444.500,165504.685632,1.092841,0.169756,zigzag,0.25,25.8,3.67
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271,41002,jashore,26.584524,70.191274,62.50,november,3,april,4,101388.000,95460.132076,0.941533,0.084730,zigzag,0.25,25.8,3.67
272,41010,jashore,22.997882,60.721441,67.20,november,3,may,3,162407.500,121442.882421,0.747766,0.099565,zigzag,0.25,25.8,3.67
273,41014,jashore,25.825248,68.186553,52.00,november,3,april,3,127201.200,103643.559929,0.814800,0.094822,zigzag,0.25,25.8,3.67
274,41085,jashore,20.806533,54.935610,52.65,november,4,april,3,129866.000,96686.672998,0.744511,0.109902,zigzag,0.25,25.8,3.67


In [104]:
# Read RCT emissions data
# Emissions data for RCT kilns
# CO2 emissions are reported in tons/100,000 bricks
# PM2.5 emissions are kg/100,000 bricks

# CO2 and PM2.5 emissions multiplied by annual brick production (variable “annual_production_actual_lakh_bricks”), 
# a variable we only have for RCT kilns =  season-wide CO2 emissions in tons and PM2.5 emissions in kg.

emi['CO2_tons_season'] = emi['co2_emission'] * emi['annual_production_actual_lakh_bricks']
emi['pm25_kg_season'] = emi['pm_2_5_emissions'] * emi['annual_production_actual_lakh_bricks']

# get how many months each kiln operated in this firing season
# create firing start date 
month_to_num = {month.lower(): index for index, month in enumerate(calendar.month_name) if month}

# Define which months correspond to each year
months_2022 = ['october', 'november', 'december']
months_2023 = ['january', 'february', 'march', 'april', 'may', 'june']

def get_week_start_date(year, month, week):
    # Get the first day of the month
    first_day = datetime.date(year, month, 1)
    # Find the first Monday of the month (if the first day isn't already a Monday)
    first_monday = first_day + datetime.timedelta(days=(0 - first_day.weekday()) % 7)
    # Calculate the Monday of the desired week
    return first_monday + datetime.timedelta(weeks=week-1)

# Apply the function with a conditional for the year
emi['firing_start_date'] = emi.apply(
    lambda row: get_week_start_date(
        2022 if row['firing_month'].lower() in months_2022 else 2023,
        month_to_num[row['firing_month'].lower()],
        int(row['firing_week'])
    ),
    axis=1
)

# Convert firing_end_date similarly, and fix the misplaced parenthesis
emi['firing_end_date'] = emi.apply(
    lambda row: get_week_start_date(
        2023, 
        month_to_num[row['firing_end_month'].lower()],  # map month name to number
        int(row['firing_end_week'])
    ),
    axis=1
)
emi['firing_start_date'] = pd.to_datetime(emi['firing_start_date'])
emi['firing_end_date'] = pd.to_datetime(emi['firing_end_date'])

# Calculate season_days as an integer representing the number of days
emi['season_days'] = (emi['firing_end_date'] - emi['firing_start_date']).dt.days

emi['avg_co2_tons_per_day'] = emi['CO2_tons_season']/emi['season_days'] 
emi['avg_co2_tons_per_month'] = emi['avg_co2_tons_per_day']*30 

emi['avg_pm25_kg_per_day'] = emi['pm25_kg_season']/emi['season_days'] 
emi['avg_pm25_kg_per_month'] = emi['avg_pm25_kg_per_day']*30 

emi['category'] = 'RCT'

#get per day emission, then multiply
emi.head()

Unnamed: 0,kiln_id,kiln_district,co2_emission,pm_2_5_emissions,annual_production_actual_lakh_bricks,firing_month,firing_week,firing_end_month,firing_end_week,weight_bricks_fired,total_energy_input,sec,total_production_kpm,kiln_type,pm25_emi_factor,co2_emi_factor,co2_conversion_factor,CO2_tons_season,pm25_kg_season,firing_start_date,firing_end_date,season_days,avg_co2_tons_per_day,avg_co2_tons_per_month,avg_pm25_kg_per_day,avg_pm25_kg_per_month,category
0,44053,jhenaidah,21.189124,55.945769,75.82,november,2,may,3,164281.875,119556.107746,0.72775,0.106693,zigzag,0.25,25.8,3.67,1606.559399,4241.808184,2022-11-14,2023-05-15,182,8.827249,264.817483,23.306638,699.199151,RCT
1,41091,jashore,31.556736,83.319434,82.65,december,2,july,4,105302.4,111981.319041,1.063426,0.111686,zigzag,0.25,25.8,3.67,2608.164201,6886.351204,2022-12-12,2023-07-24,224,11.64359,349.307705,30.742639,922.279179,RCT
2,41046,jashore,30.30778,80.021808,32.52,november,4,july,2,118776.0,125474.194629,1.056394,0.097995,zigzag,0.25,25.8,3.67,985.608992,2602.30919,2022-11-28,2023-07-10,224,4.40004,132.001204,11.617452,348.523552,RCT
3,41075,jashore,28.498033,75.243522,33.45,december,3,july,2,124340.0,120389.635622,0.968229,0.099609,zigzag,0.25,25.8,3.67,953.25919,2516.89582,2022-12-19,2023-07-10,203,4.695858,140.875742,12.398502,371.955047,RCT
4,47051,khulna,30.727405,81.129748,109.2,november,2,july,4,151444.5,165504.685632,1.092841,0.169756,zigzag,0.25,25.8,3.67,3355.43265,8859.368466,2022-11-14,2023-07-24,252,13.315209,399.456268,35.156224,1054.686722,RCT


In [106]:
# average season_days amongst RCT kilns
operation_avg_days = emi['season_days'].mean()
print(operation_avg_days)

# average annual production brickls
brick_mean = emi['annual_production_actual_lakh_bricks'].mean()
print(brick_mean)

166.6304347826087
62.593492391304345


In [108]:
# benchmark annual brick production in Bangladesh in 2018: 33 billion for 7,859 kilns
# https://www.ccacoalition.org/sites/default/files/resources/2019_Report_Bangladesh%20Brick%20Sector%20Roadmap.pdf

33000000000/7859

4199007.507316452

In [110]:
# take the average of brick mean production in RCT emissions data and public report
brick_mean = (42+63)/2
brick_mean

52.5

In [112]:
emi2 = pd.read_csv(os.path.join(path, "scaling_emissions_data.csv"))

emi2['CO2_tons_season'] = emi2['co2_emission'] * brick_mean # use average annual production of bricks available in RCT
emi2['pm25_kg_season'] = emi2['pm_2_5_emissions'] * brick_mean

# Convert firing_start_date using the mapped month numbers
emi2['firing_start_date'] = emi2.apply(
    lambda row: get_week_start_date(
        2022, 
        month_to_num[row['firing_season_firing_month'].lower()],  # map month name to number
        int(row['firing_season_firing_week'])
    ),
    axis=1
)

emi2['firing_start_date'] = pd.to_datetime(emi2['firing_start_date'])
emi2['firing_end_date'] = emi2['firing_start_date'] + pd.to_timedelta(operation_avg_days, unit='D') # use data from RCT

# Calculate season_days as an integer representing the number of days
emi2['season_days'] = operation_avg_days

emi2['avg_co2_tons_per_day'] = emi2['CO2_tons_season']/emi2['season_days'] 
emi2['avg_co2_tons_per_month'] = emi2['avg_co2_tons_per_day']*30 

emi2['avg_pm25_kg_per_day'] = emi2['pm25_kg_season']/emi2['season_days'] 
emi2['avg_pm25_kg_per_month'] = emi2['avg_pm25_kg_per_day']*30 

emi2['category'] = 'Scaling'
emi2['kiln_type'] = 'zigzag'
emi2['pm25_emi_factor'] = 0.25
emi2['co2_emi_factor'] = 25.8
emi2['co2_conversion_factor'] = 3.67
emi2.head()

Unnamed: 0,kiln_id,collection_date,stage,kiln_division,kiln_district,kiln_sub_district,firing_season_firing_month,firing_season_firing_week,firing_season_circuits_completed,firing_season_circuits_current,fuel_used,bricks_1_chamber_count,chamber_completed_during_monitoring_count,adopter,total_fuel_consumed_kpm,total_coal_consumed_kpm,specific_fuel_consumption_tons_per_lakh,specific_coal_consumption_tons_per_lakh,sec,co2_emission,total_fuel_consumed_self_report,total_coal_consumed_self_report,brick_weight_1_lakh,energy_input_1_lakh,pm_2_5_emissions,co2e_class_1,co2e_class_1_5,co2e_class_2,co2e_class_3,CO2_tons_season,pm25_kg_season,firing_start_date,firing_end_date,season_days,avg_co2_tons_per_day,avg_co2_tons_per_month,avg_pm25_kg_per_day,avg_pm25_kg_per_month,category,kiln_type,pm25_emi_factor,co2_emi_factor,co2_conversion_factor
0,33183,2024-05-09,stage 2 - No Info,dhaka,gazipur,kaliakair,november,2,9,10,indonesian_coal,16150,2.4,0,5.591,5.591,14.424665,14.424665,1.132336,28.980587,,,270300,306070.459027,76.517615,3.251622,0.0,0.846233,0.530779,1521.480843,4017.174775,2022-11-14,2023-04-29 15:07:49.565217391,166.630435,9.13087,273.926101,24.10829,723.248687,Scaling,zigzag,0.25,25.8,3.67
1,33096,2024-05-12,stage 1,dhaka,gazipur,kaliakair,december,2,9,10,indonesian_coal south_african_coal,8750,6.0,1,6.2,6.2,11.809524,11.809524,0.89199,23.783649,,,281600,251184.426809,62.796107,1.912205,0.0,0.100605,0.060016,1248.641553,3296.795602,2022-12-12,2023-05-27 15:07:49.565217391,166.630435,7.493478,224.80435,19.785075,593.552241,Scaling,zigzag,0.25,25.8,3.67
2,33051,2024-05-07,stage 2 - No Info,dhaka,gazipur,kaliakair,november,3,6,7,indonesian_coal south_african_coal,14550,2.6,0,5.616,5.616,14.845361,14.845361,1.051447,29.912002,,,300450,315907.333091,78.976833,2.820702,0.0,1.080422,0.413922,1570.380091,4146.283747,2022-11-21,2023-05-06 15:07:49.565217391,166.630435,9.424329,282.729879,24.883112,746.493356,Scaling,zigzag,0.25,25.8,3.67
3,33032,2024-05-05,stage 2 - Info,dhaka,gazipur,kaliakair,november,1,9,10,indonesian_coal south_african_coal,13500,2.5,0,5.4,5.4,16.0,16.0,1.198242,32.261428,,,284350,340720.153418,85.180038,2.661568,0.0,1.096889,0.525216,1693.724993,4471.952014,2022-11-07,2023-04-22 15:07:49.565217391,166.630435,10.16456,304.936789,26.837546,805.12639,Scaling,zigzag,0.25,25.8,3.67
4,33006,2024-03-28,stage 2 - Info,dhaka,gazipur,kapasia,december,1,6,7,australian_coal indian_coal sawdust,10550,6.1,1,13.478,7.11,20.943206,11.048093,1.298982,37.144613,,,302000,392292.561594,98.07314,3.18255,0.0,0.573141,0.136061,1950.092208,5148.839871,2022-12-05,2023-05-20 15:07:49.565217391,166.630435,11.703097,351.092922,30.899757,926.992697,Scaling,zigzag,0.25,25.8,3.67


## Merge emissions data with kiln locations

In [115]:
RCT_kilns = RCT_kilns.drop(columns = ['category','division'])
Scaling_kilns = Scaling_kilns.drop(columns = ['category','division'])

In [117]:
emi = pd.merge(emi, RCT_kilns, on = 'kiln_id', how = 'left')
emi.head()

Unnamed: 0,kiln_id,kiln_district,co2_emission,pm_2_5_emissions,annual_production_actual_lakh_bricks,firing_month,firing_week,firing_end_month,firing_end_week,weight_bricks_fired,total_energy_input,sec,total_production_kpm,kiln_type_x,pm25_emi_factor,co2_emi_factor,co2_conversion_factor,CO2_tons_season,pm25_kg_season,firing_start_date,firing_end_date,season_days,avg_co2_tons_per_day,avg_co2_tons_per_month,avg_pm25_kg_per_day,avg_pm25_kg_per_month,category,kiln_type_y,geometry
0,44053,jhenaidah,21.189124,55.945769,75.82,november,2,may,3,164281.875,119556.107746,0.72775,0.106693,zigzag,0.25,25.8,3.67,1606.559399,4241.808184,2022-11-14,2023-05-15,182,8.827249,264.817483,23.306638,699.199151,RCT,zigzag,POINT (4918024.418 3986537.895)
1,41091,jashore,31.556736,83.319434,82.65,december,2,july,4,105302.4,111981.319041,1.063426,0.111686,zigzag,0.25,25.8,3.67,2608.164201,6886.351204,2022-12-12,2023-07-24,224,11.64359,349.307705,30.742639,922.279179,RCT,zigzag,POINT (4938958.158 3944789.037)
2,41046,jashore,30.30778,80.021808,32.52,november,4,july,2,118776.0,125474.194629,1.056394,0.097995,zigzag,0.25,25.8,3.67,985.608992,2602.30919,2022-11-28,2023-07-10,224,4.40004,132.001204,11.617452,348.523552,RCT,zigzag,POINT (4934329.554 3934110.272)
3,41075,jashore,28.498033,75.243522,33.45,december,3,july,2,124340.0,120389.635622,0.968229,0.099609,zigzag,0.25,25.8,3.67,953.25919,2516.89582,2022-12-19,2023-07-10,203,4.695858,140.875742,12.398502,371.955047,RCT,zigzag,POINT (4903230.431 3920612.989)
4,47051,khulna,30.727405,81.129748,109.2,november,2,july,4,151444.5,165504.685632,1.092841,0.169756,zigzag,0.25,25.8,3.67,3355.43265,8859.368466,2022-11-14,2023-07-24,252,13.315209,399.456268,35.156224,1054.686722,RCT,zigzag,POINT (4939200.271 3898884.545)


In [119]:
emi2 = pd.merge(emi2, Scaling_kilns, on = 'kiln_id', how = 'left')
emi2.head()

Unnamed: 0,kiln_id,collection_date,stage,kiln_division,kiln_district,kiln_sub_district,firing_season_firing_month,firing_season_firing_week,firing_season_circuits_completed,firing_season_circuits_current,fuel_used,bricks_1_chamber_count,chamber_completed_during_monitoring_count,adopter,total_fuel_consumed_kpm,total_coal_consumed_kpm,specific_fuel_consumption_tons_per_lakh,specific_coal_consumption_tons_per_lakh,sec,co2_emission,total_fuel_consumed_self_report,total_coal_consumed_self_report,brick_weight_1_lakh,energy_input_1_lakh,pm_2_5_emissions,co2e_class_1,co2e_class_1_5,co2e_class_2,co2e_class_3,CO2_tons_season,pm25_kg_season,firing_start_date,firing_end_date,season_days,avg_co2_tons_per_day,avg_co2_tons_per_month,avg_pm25_kg_per_day,avg_pm25_kg_per_month,category,kiln_type_x,pm25_emi_factor,co2_emi_factor,co2_conversion_factor,kiln_type_y,geometry
0,33183,2024-05-09,stage 2 - No Info,dhaka,gazipur,kaliakair,november,2,9,10,indonesian_coal,16150,2.4,0,5.591,5.591,14.424665,14.424665,1.132336,28.980587,,,270300,306070.459027,76.517615,3.251622,0.0,0.846233,0.530779,1521.480843,4017.174775,2022-11-14,2023-04-29 15:07:49.565217391,166.630435,9.13087,273.926101,24.10829,723.248687,Scaling,zigzag,0.25,25.8,3.67,zigzag,POINT (5016424.699 4037897.329)
1,33096,2024-05-12,stage 1,dhaka,gazipur,kaliakair,december,2,9,10,indonesian_coal south_african_coal,8750,6.0,1,6.2,6.2,11.809524,11.809524,0.89199,23.783649,,,281600,251184.426809,62.796107,1.912205,0.0,0.100605,0.060016,1248.641553,3296.795602,2022-12-12,2023-05-27 15:07:49.565217391,166.630435,7.493478,224.80435,19.785075,593.552241,Scaling,zigzag,0.25,25.8,3.67,zigzag,POINT (5016091.945 4045783.632)
2,33051,2024-05-07,stage 2 - No Info,dhaka,gazipur,kaliakair,november,3,6,7,indonesian_coal south_african_coal,14550,2.6,0,5.616,5.616,14.845361,14.845361,1.051447,29.912002,,,300450,315907.333091,78.976833,2.820702,0.0,1.080422,0.413922,1570.380091,4146.283747,2022-11-21,2023-05-06 15:07:49.565217391,166.630435,9.424329,282.729879,24.883112,746.493356,Scaling,zigzag,0.25,25.8,3.67,zigzag,POINT (5016185.782 4038189.564)
3,33032,2024-05-05,stage 2 - Info,dhaka,gazipur,kaliakair,november,1,9,10,indonesian_coal south_african_coal,13500,2.5,0,5.4,5.4,16.0,16.0,1.198242,32.261428,,,284350,340720.153418,85.180038,2.661568,0.0,1.096889,0.525216,1693.724993,4471.952014,2022-11-07,2023-04-22 15:07:49.565217391,166.630435,10.16456,304.936789,26.837546,805.12639,Scaling,zigzag,0.25,25.8,3.67,zigzag,POINT (5016217.823 4039343.796)
4,33006,2024-03-28,stage 2 - Info,dhaka,gazipur,kapasia,december,1,6,7,australian_coal indian_coal sawdust,10550,6.1,1,13.478,7.11,20.943206,11.048093,1.298982,37.144613,,,302000,392292.561594,98.07314,3.18255,0.0,0.573141,0.136061,1950.092208,5148.839871,2022-12-05,2023-05-20 15:07:49.565217391,166.630435,11.703097,351.092922,30.899757,926.992697,Scaling,zigzag,0.25,25.8,3.67,zigzag,POINT (5053721.651 4051490.891)


In [123]:
emissions = pd.concat([emi, emi2])
emissions = emissions[['kiln_id', 'category','firing_start_date','firing_end_date','season_days','avg_co2_tons_per_day',
                       'avg_co2_tons_per_month','avg_pm25_kg_per_day','avg_pm25_kg_per_month', 'geometry', 
                       'pm25_emi_factor','co2_emi_factor','co2_conversion_factor']].copy()

emissions['firing_end_date'] = emissions['firing_end_date'].dt.strftime('%Y-%m-%d')
emissions.head()

Unnamed: 0,kiln_id,category,firing_start_date,firing_end_date,season_days,avg_co2_tons_per_day,avg_co2_tons_per_month,avg_pm25_kg_per_day,avg_pm25_kg_per_month,geometry,pm25_emi_factor,co2_emi_factor,co2_conversion_factor
0,44053,RCT,2022-11-14,2023-05-15,182.0,8.827249,264.817483,23.306638,699.199151,POINT (4918024.418 3986537.895),0.25,25.8,3.67
1,41091,RCT,2022-12-12,2023-07-24,224.0,11.64359,349.307705,30.742639,922.279179,POINT (4938958.158 3944789.037),0.25,25.8,3.67
2,41046,RCT,2022-11-28,2023-07-10,224.0,4.40004,132.001204,11.617452,348.523552,POINT (4934329.554 3934110.272),0.25,25.8,3.67
3,41075,RCT,2022-12-19,2023-07-10,203.0,4.695858,140.875742,12.398502,371.955047,POINT (4903230.431 3920612.989),0.25,25.8,3.67
4,47051,RCT,2022-11-14,2023-07-24,252.0,13.315209,399.456268,35.156224,1054.686722,POINT (4939200.271 3898884.545),0.25,25.8,3.67


In [125]:
# CO2 and PM2.5 emissions from nearby kilns in RCT & Scaling

# Create a GeoDataFrame using the geometry column
emissions_gdf = gpd.GeoDataFrame(emissions, geometry=emissions['geometry'], crs = 7755)

# 1. Ensure both GeoDataFrames have the same CRS
location_gdf = PNAS_kilns.copy().rename(columns = {'kiln_id':'id'})
location_gdf = location_gdf.to_crs(emissions_gdf.crs)

# Create a buffer around each kiln location (if not already done)
neighbor_distance = 10000  # e.g., 10km
location_gdf['buffer'] = location_gdf.geometry.buffer(neighbor_distance)

# Set the 'buffer' column as the active geometry column
location_gdf_buffer = location_gdf.set_geometry('buffer')

# Now perform the spatial join using the active geometry from location_gdf_buffer.
neighbors = gpd.sjoin(
    emissions_gdf, 
   location_gdf_buffer[['id', 'category','buffer']], 
    how='inner', 
    predicate='within'
)

neighbors['firing_start_date'] = pd.to_datetime(neighbors['firing_start_date'])
neighbors['firing_end_date'] = pd.to_datetime(neighbors['firing_end_date'])

# Define the emission columns for which to compute the mean
aggregations = {
    'avg_co2_tons_per_day': 'mean',
    'avg_co2_tons_per_month': 'mean',
    'avg_pm25_kg_per_day': 'mean',
    'avg_pm25_kg_per_month': 'mean',
    'firing_start_date': 'median',
    'firing_end_date': 'median'
}

neighbors_mean = neighbors.groupby('id').agg(aggregations).reset_index()

# Optionally, convert back to a string format (year-month-day)
neighbors_mean['firing_start_date'] = neighbors_mean['firing_start_date'].dt.strftime('%Y-%m-%d')
neighbors_mean['firing_end_date'] = neighbors_mean['firing_end_date'].dt.strftime('%Y-%m-%d')
neighbors_mean.head()

Unnamed: 0,id,avg_co2_tons_per_day,avg_co2_tons_per_month,avg_pm25_kg_per_day,avg_pm25_kg_per_month,firing_start_date,firing_end_date
0,100807,11.57049,347.114694,30.549632,916.488959,2022-11-24,2023-05-18
1,100819,18.12301,543.690306,47.85029,1435.508698,2022-12-29,2023-04-13
2,100869,11.015442,330.463266,29.084137,872.524097,2022-11-28,2023-05-22
3,100874,13.274235,398.227046,35.04804,1051.4412,2022-11-17,2023-05-04
4,100875,11.505809,345.17428,30.378856,911.365671,2022-11-21,2023-05-22


In [127]:
location_merged = pd.merge(location_gdf, neighbors_mean, on = 'id', how = 'left')
emissions_df = emissions.copy()

# For those kilns that do not have neighboring kilns in RCT/scaling data, impute missing emissions using whole average values
location_merged['avg_co2_tons_per_day'] = location_merged['avg_co2_tons_per_day'].fillna(
    emissions_df['avg_co2_tons_per_day'].mean()
)
location_merged['avg_co2_tons_per_month'] = location_merged['avg_co2_tons_per_month'].fillna(
    emissions_df['avg_co2_tons_per_month'].mean()
)
location_merged['avg_pm25_kg_per_day'] = location_merged['avg_pm25_kg_per_day'].fillna(
    emissions_df['avg_pm25_kg_per_day'].mean()
)
location_merged['avg_pm25_kg_per_month'] = location_merged['avg_pm25_kg_per_month'].fillna(
    emissions_df['avg_pm25_kg_per_month'].mean()
)

location_merged['firing_start_date'] = location_merged['firing_start_date'].fillna(
    emissions_df['firing_start_date'].median()
)

emissions_df['firing_end_date'] = pd.to_datetime(emissions_df['firing_end_date'])
location_merged['firing_end_date'] = pd.to_datetime(location_merged['firing_end_date'])

location_merged['firing_end_date'] = location_merged['firing_end_date'].fillna(
    emissions_df['firing_end_date'].median()
)

location_merged = location_merged.rename(columns = {'id':'kiln_id', 'lat':'latitude','long':'longitude'}).drop(columns = ['buffer'])
location_merged.head()

Unnamed: 0,kiln_type,kiln_id,category,division,geometry,avg_co2_tons_per_day,avg_co2_tons_per_month,avg_pm25_kg_per_day,avg_pm25_kg_per_month,firing_start_date,firing_end_date
0,fck,100000,PNAS,Chittagong,POINT (5241276.392 3739925.248),10.163055,304.891646,26.833573,805.007199,2022-11-21 00:00:00,2023-04-29
1,fck,100001,PNAS,Chittagong,POINT (5241189.938 3740082.634),10.163055,304.891646,26.833573,805.007199,2022-11-21 00:00:00,2023-04-29
2,fck,100002,PNAS,Chittagong,POINT (5240070.054 3740287.121),10.163055,304.891646,26.833573,805.007199,2022-11-21 00:00:00,2023-04-29
3,fck,100003,PNAS,Chittagong,POINT (5241021.955 3740369.603),10.163055,304.891646,26.833573,805.007199,2022-11-21 00:00:00,2023-04-29
4,fck,100004,PNAS,Chittagong,POINT (5239764.288 3751000.298),10.163055,304.891646,26.833573,805.007199,2022-11-21 00:00:00,2023-04-29


In [129]:
fin_gdf = pd.concat([location_merged, emissions_df])
fin_df = fin_gdf.drop(columns = ['season_days', 'geometry','avg_co2_tons_per_day','avg_pm25_kg_per_day'])
fin_df['firing_start_date'] = pd.to_datetime(fin_df['firing_start_date'])
fin_df['firing_end_date'] = pd.to_datetime(fin_df['firing_end_date'])

fin_df.head()

Unnamed: 0,kiln_type,kiln_id,category,division,avg_co2_tons_per_month,avg_pm25_kg_per_month,firing_start_date,firing_end_date,pm25_emi_factor,co2_emi_factor,co2_conversion_factor
0,fck,100000,PNAS,Chittagong,304.891646,805.007199,2022-11-21,2023-04-29,,,
1,fck,100001,PNAS,Chittagong,304.891646,805.007199,2022-11-21,2023-04-29,,,
2,fck,100002,PNAS,Chittagong,304.891646,805.007199,2022-11-21,2023-04-29,,,
3,fck,100003,PNAS,Chittagong,304.891646,805.007199,2022-11-21,2023-04-29,,,
4,fck,100004,PNAS,Chittagong,304.891646,805.007199,2022-11-21,2023-04-29,,,


In [131]:
fin_df['category'].unique()

array(['PNAS', 'RCT', 'Scaling'], dtype=object)

In [133]:
fin_df['firing_end_date'].max()

Timestamp('2023-07-24 00:00:00')

In [139]:
def month_active_fraction(row, current_month):
    # current_month is a Timestamp representing the first day of the month (e.g., 2022-11-01)
    start = row['firing_start_date']
    end = row['firing_end_date']
    
    # Define the month’s start and end dates.
    month_start = current_month
    last_day = calendar.monthrange(current_month.year, current_month.month)[1]
    month_end = current_month.replace(day=last_day)
    
    # Overlapping period:
    overlap_start = max(start, month_start)
    overlap_end = min(end, month_end)
    
    # If there's no overlap, return 0
    if overlap_end < overlap_start:
        return 0
    
    # +1 to count inclusive days.
    overlap_days = (overlap_end - overlap_start).days + 1
    total_days = last_day
    fraction = overlap_days / total_days
    return fraction
    

def expand_row(row):
    # Ensure firing dates are datetime objects (if not already)
    start_month = row['firing_start_date'].replace(day=1)
    end_month = row['firing_end_date'].replace(day=1)
    
    # Create a date range for the first day of each month in the firing period
    monthly_dates = pd.date_range(start=start_month, end=end_month, freq='MS')
    
    expanded = []
    for d in monthly_dates:
        new_row = row.copy()
        # Compute the fraction of the month that is active
        fraction = month_active_fraction(row, d)
        
        # Create a new column 'date' with year-month (formatted as 'YYYY-MM')
        new_row['date'] = d.strftime('%Y-%m')
        
        # Adjust emissions by the fraction of the month active.
        # For example, if only half the month was active, emissions are halved.
        new_row['avg_co2_tons_per_month'] = row['avg_co2_tons_per_month'] * fraction
        new_row['avg_pm25_kg_per_month'] = row['avg_pm25_kg_per_month'] * fraction
        expanded.append(new_row)
    return expanded

# Expand each row in fin_df
expanded_rows = []
for _, row in fin_df.iterrows():
    expanded_rows.extend(expand_row(row))

# Create the new DataFrame
expanded_df = pd.DataFrame(expanded_rows)

# Select only the desired columns
result_df = expanded_df[['kiln_id', 'category', 'date',
                           'avg_co2_tons_per_month', 'avg_pm25_kg_per_month']]

result_df.head()

Unnamed: 0,kiln_id,category,date,avg_co2_tons_per_month,avg_pm25_kg_per_month
0,100000,PNAS,2022-11,101.630549,268.335733
0,100000,PNAS,2022-12,304.891646,805.007199
0,100000,PNAS,2023-01,304.891646,805.007199
0,100000,PNAS,2023-02,304.891646,805.007199
0,100000,PNAS,2023-03,304.891646,805.007199


In [140]:
result_df['category'].unique()

array(['PNAS', 'RCT', 'Scaling'], dtype=object)

In [143]:
result_df = result_df[['kiln_id','category','date','avg_co2_tons_per_month','avg_pm25_kg_per_month']].copy()
result_df

Unnamed: 0,kiln_id,category,date,avg_co2_tons_per_month,avg_pm25_kg_per_month
0,100000,PNAS,2022-11,101.630549,268.335733
0,100000,PNAS,2022-12,304.891646,805.007199
0,100000,PNAS,2023-01,304.891646,805.007199
0,100000,PNAS,2023-02,304.891646,805.007199
0,100000,PNAS,2023-03,304.891646,805.007199
...,...,...,...,...,...
195,26202,Scaling,2022-12,165.077055,435.853915
195,26202,Scaling,2023-01,165.077055,435.853915
195,26202,Scaling,2023-02,165.077055,435.853915
195,26202,Scaling,2023-03,165.077055,435.853915


In [145]:
# put back the geometry
kilns_fin = pd.merge(result_df, kilns_gps[['kiln_id','kiln_type','geometry']].copy(), on = 'kiln_id', how = 'left')
kilns_fin

Unnamed: 0,kiln_id,category,date,avg_co2_tons_per_month,avg_pm25_kg_per_month,kiln_type,geometry
0,100000,PNAS,2022-11,101.630549,268.335733,fck,POINT (5241276.392 3739925.248)
1,100000,PNAS,2022-12,304.891646,805.007199,fck,POINT (5241276.392 3739925.248)
2,100000,PNAS,2023-01,304.891646,805.007199,fck,POINT (5241276.392 3739925.248)
3,100000,PNAS,2023-02,304.891646,805.007199,fck,POINT (5241276.392 3739925.248)
4,100000,PNAS,2023-03,304.891646,805.007199,fck,POINT (5241276.392 3739925.248)
...,...,...,...,...,...,...,...
40282,26202,Scaling,2022-12,165.077055,435.853915,zigzag,POINT (5011798.694 4024869.155)
40283,26202,Scaling,2023-01,165.077055,435.853915,zigzag,POINT (5011798.694 4024869.155)
40284,26202,Scaling,2023-02,165.077055,435.853915,zigzag,POINT (5011798.694 4024869.155)
40285,26202,Scaling,2023-03,165.077055,435.853915,zigzag,POINT (5011798.694 4024869.155)


In [157]:
# For fck kilns, PM2.5 emissions factor should be 0.27 (zigzag = 0.25) -> 108%

kilns_fin['avg_pm25_kg_per_month'] = np.where(kilns_fin['kiln_type']=='fck',kilns_fin['avg_pm25_kg_per_month']*1.08, kilns_fin['avg_pm25_kg_per_month']) 
kilns_fin

Unnamed: 0,kiln_id,category,date,avg_co2_tons_per_month,avg_pm25_kg_per_month,kiln_type,geometry
0,100000,PNAS,2022-11,101.630549,289.802592,fck,POINT (5241276.392 3739925.248)
1,100000,PNAS,2022-12,304.891646,869.407775,fck,POINT (5241276.392 3739925.248)
2,100000,PNAS,2023-01,304.891646,869.407775,fck,POINT (5241276.392 3739925.248)
3,100000,PNAS,2023-02,304.891646,869.407775,fck,POINT (5241276.392 3739925.248)
4,100000,PNAS,2023-03,304.891646,869.407775,fck,POINT (5241276.392 3739925.248)
...,...,...,...,...,...,...,...
40282,26202,Scaling,2022-12,165.077055,435.853915,zigzag,POINT (5011798.694 4024869.155)
40283,26202,Scaling,2023-01,165.077055,435.853915,zigzag,POINT (5011798.694 4024869.155)
40284,26202,Scaling,2023-02,165.077055,435.853915,zigzag,POINT (5011798.694 4024869.155)
40285,26202,Scaling,2023-03,165.077055,435.853915,zigzag,POINT (5011798.694 4024869.155)


In [159]:
result_df.to_csv(os.path.join(path, "co2_pm25_emissions_brik_kiln.csv"), index = False)