## Append/Load National Monthly Data to final destination

In [4]:
import pandas as pd
import os
import numpy as np
from datetime import datetime, timedelta

from datetime import datetime, timedelta

# Define default parameters when running in Jupyter
try:
    parameters
except NameError:
    parameters = {
        "cwd": "."
    }


# === Get previous month range ===
today = datetime.today()
first_of_this_month = datetime(today.year, today.month, 1)
last_of_prev_month = first_of_this_month - timedelta(days=1)
first_of_prev_month = datetime(last_of_prev_month.year, last_of_prev_month.month, 1)

start_date = first_of_prev_month.strftime("%Y-%m-%d")
end_date = last_of_prev_month.strftime("%Y-%m-%d")
month_str = first_of_prev_month.strftime("%Y_%m")

# === Paths and config ===
cwd = parameters.get("cwd", ".")
os.makedirs(cwd, exist_ok=True)

# Construct full paths
input_path = os.path.join(cwd, f"weather_national_monthly_{month_str}.csv")
national_path = os.path.join(cwd, "selected_data.csv")

# === Load and prepare new data ===
national_df = pd.read_csv(input_path)
national_df["ddd_demand"] = np.nan

# Ensure column order
column_order = [
    "avg_temp_max", "avg_temp_min", "avg_humidity",
    "total_precipitation", "total_sunshine_hours", "ddd_demand"
]
national_df = national_df[column_order]

# === Append or create ===
if os.path.exists(national_path):
    national_existing = pd.read_csv(national_path)

    # Match columns excluding target
    match_columns = [
        "avg_temp_max", "avg_temp_min", "avg_humidity",
        "total_precipitation", "total_sunshine_hours"
    ]

    # Avoid duplicates
    mask = (national_existing[match_columns] == national_df.loc[0, match_columns]).all(axis=1)
    national_existing = national_existing.loc[~mask]

    # Append new data
    national_combined = pd.concat([national_existing, national_df], ignore_index=True)
    print("✅ Appended new month at bottom of file.")
else:
    national_combined = national_df
    print("📦 Created new file with initial entry.")

# === Save updated file ===
national_combined.to_csv(national_path, index=False)


✅ Appended new month at bottom of file.


## Append/Load District Monthly Data to final destination

In [6]:
import os
import sys
import pandas as pd

# Define default parameters when running in Jupyter
try:
    parameters
except NameError:
    parameters = {
        "cwd": "."
    }


# Get working directory from Papermill parameters (default: current dir)
cwd = parameters.get("cwd", ".")
os.makedirs(cwd, exist_ok=True)

# Construct full paths
month_str = parameters.get("month_str")  # Ensure this is passed in too
historical_path = os.path.join(cwd, "malaria_historical.csv")
new_data_path = os.path.join(cwd, f"weather_district_monthly_{month_str}.csv")

# Load new monthly climate data
new_df = pd.read_csv(new_data_path)

# Add mal_cases column as empty (for future prediction)
new_df["mal_cases"] = pd.NA

# Reorder columns for consistency
new_df = new_df[[
    "year", "month", "district", "mal_cases",
    "avg_temp_max", "avg_temp_min", "avg_humidity",
    "total_precipitation", "total_sunshine_hours"
]]

# Check if historical data exists
if os.path.exists(historical_path):
    historical_df = pd.read_csv(historical_path)

    # Standardize column names
    historical_df = historical_df.rename(columns={
        "sum_precipitation": "total_precipitation",
        "sum_sunshine_hours": "total_sunshine_hours"
    })

    # Ensure mal_cases column exists
    if "mal_cases" not in historical_df.columns:
        historical_df["mal_cases"] = pd.NA

    # Reorder for consistency
    historical_df = historical_df[new_df.columns]

    # --- DUPLICATE CHECK ---
    duplicates = pd.merge(
        new_df[["district", "year", "month"]],
        historical_df[["district", "year", "month"]],
        on=["district", "year", "month"],
        how="inner"
    )

    if not duplicates.empty:
        print("\n🚫 Data already exists for the following district-month combinations:\n")
        print(duplicates.to_string(index=False))
        print("\nℹ️ Skipping append to avoid duplicate entries. No changes were made.\n")
        sys.exit(0)

    # Append
    combined_df = pd.concat([historical_df, new_df], ignore_index=True)

else:
    # No historical data yet
    combined_df = new_df

# Sort and write back
combined_df = combined_df.sort_values(by=["district", "year", "month"]).reset_index(drop=True)
combined_df.to_csv(historical_path, index=False)

print("✅ District-level data appended and saved successfully.")



🚫 Data already exists for the following district-month combinations:

     district  year  month
         Abim  2025      6
     Adjumani  2025      6
        Agago  2025      6
     Alebtong  2025      6
     Amolatar  2025      6
       Amudat  2025      6
       Amuria  2025      6
        Amuru  2025      6
         Apac  2025      6
         Arua  2025      6
       Budaka  2025      6
       Bududa  2025      6
       Bugiri  2025      6
      Bugweri  2025      6
      Buhweju  2025      6
       Buikwe  2025      6
      Bukedea  2025      6
 Bukomansimbi  2025      6
        Bukwo  2025      6
    Bulambuli  2025      6
      Buliisa  2025      6
   Bundibugyo  2025      6
   Bunyangabu  2025      6
        Busia  2025      6
     Butaleja  2025      6
    Butambala  2025      6
       Butebo  2025      6
       Buvuma  2025      6
      Buyende  2025      6
       Dokolo  2025      6
         Gulu  2025      6
        Hoima  2025      6
       Ibanda  2025      6
       Igan

SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
