In [8]:
# --- Import dependencies ---
import pandas as pd

In [9]:
# --- Define input and output file paths ---
FIREWOOD_FILE = "../data/processed/fuel_consumption_master_dataset_2010_2024.csv"
CLEANFUEL_FILE = "../data/processed/cameroon_clean_cooking_simple_2010_2024.csv"
OUTFILE = "../data/raw/merged_raw_hybrid.csv"

In [10]:
# --- STEP 3: Load both datasets ---
fw = pd.read_csv(FIREWOOD_FILE)
cf = pd.read_csv(CLEANFUEL_FILE)

print("Firewood dataset shape:", fw.shape)
print("Clean fuel dataset shape:", cf.shape)

Firewood dataset shape: (5479, 25)
Clean fuel dataset shape: (5479, 12)


In [11]:
# --- STEP 4: Normalize column names ---
fw.columns = fw.columns.str.lower()
cf.columns = cf.columns.str.lower()

In [12]:
# --- STEP 5: Identify region and year columns ---
def find_col(df, candidates):
    for c in candidates:
        if c in df.columns:
            return c
    return None

region_fw = find_col(fw, ['region','province','admin_name'])
year_fw = find_col(fw, ['year','date'])
year_cf = find_col(cf, ['year','date'])

fw = fw.rename(columns={region_fw:'region', year_fw:'year'})
cf = cf.rename(columns={year_cf:'year'})

In [13]:

# --- STEP 6: Verify clean fuel dataset (should be national-level) ---
if 'region' not in cf.columns:
    print("⚠️ Clean fuel dataset has no 'region' column — will be merged by year only.")


⚠️ Clean fuel dataset has no 'region' column — will be merged by year only.


In [14]:
# --- STEP 7: Merge clean fuel data into every region by year ---
merged = pd.merge(fw, cf, on='year', how='left')

print("\n✅ Hybrid merge complete!")
print("Merged shape:", merged.shape)
display(merged.head())


✅ Hybrid merge complete!
Merged shape: (2001299, 36)


Unnamed: 0,date_x,temperature_avg,rainfall_mm,humidity_avg,wind_speed_kmh,season,woodfuel_production_m3,year,month_x,quarter_x,...,month_y,quarter_y,day_of_year_y,day_of_week_y,is_weekend_y,clean_fuel_access,country,country_code,series_name,series_code
0,2010-01-01,12.0,0.2,73.0,10.1,Dry,37258.41,2010,1,1,...,1,1,1,4,0,2.0,Cameroon,CMR,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.RU.ZS
1,2010-01-01,12.0,0.2,73.0,10.1,Dry,37258.41,2010,1,1,...,1,1,2,5,1,2.0,Cameroon,CMR,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.RU.ZS
2,2010-01-01,12.0,0.2,73.0,10.1,Dry,37258.41,2010,1,1,...,1,1,3,6,1,2.001,Cameroon,CMR,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.RU.ZS
3,2010-01-01,12.0,0.2,73.0,10.1,Dry,37258.41,2010,1,1,...,1,1,4,0,0,2.001,Cameroon,CMR,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.RU.ZS
4,2010-01-01,12.0,0.2,73.0,10.1,Dry,37258.41,2010,1,1,...,1,1,5,1,0,2.005,Cameroon,CMR,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.RU.ZS


In [15]:

# --- STEP 8: Save for preprocessing ---
merged.to_csv(OUTFILE, index=False)
print(f"\n💾 Saved hybrid merged dataset as {OUTFILE}")


💾 Saved hybrid merged dataset as ../data/raw/merged_raw_hybrid.csv
