In [1]:
import pandas as pd
import os

# Paths
input_file = r"D:\TRAINING MODEL\data\processed\copyFolder"
master_csv = os.path.join(input_file, "master_dataset.csv")
logs_folder = r"D:\TRAINING MODEL\logs"
dict_file = os.path.join(logs_folder, "age_group_mapping.txt")

# Load
df = pd.read_csv(master_csv)

# Reshape to long format
df_long = df.melt(
    id_vars=["Month_year", "Consultation_Type", "Case"],
    var_name="Age_Sex",
    value_name="Count"
).dropna(subset=["Count"])


In [2]:
# Split Age and Sex
df_long[["Age_Group", "Sex"]] = df_long["Age_Sex"].str.rsplit(" ", n=1, expand=True)

# Age group mapping
age_group_dict = {
    "Under 1": "0",
    "1-4": "1-4",
    "5-9": "5-9",
    "10-14": "10-14",
    "15-18": "15-18",
    "19-24": "19-24",
    "25-29": "25-29",
    "30-34": "30-34",
    "35-39": "35-39",
    "40-44": "40-44",
    "45-49": "45-49",
    "50-54": "50-54",
    "55-59": "55-59",
    "60-64": "60-64",
    "65-69": "65-69",
    "70 Over": "70+"
}

# Apply mapping
df_long["Age_Group_Num"] = df_long["Age_Group"].map(age_group_dict)

# Save dictionary to txt
with open(dict_file, "w") as f:
    for k, v in age_group_dict.items():
        f.write(f"{k}: {v}\n")

# Overwrite the original CSV
df_long.to_csv(master_csv, index=False)

print("✅ master_dataset.csv overwritten with long format")
print("✅ Age group dictionary saved at:", dict_file)

✅ master_dataset.csv overwritten with long format
✅ Age group dictionary saved at: D:\TRAINING MODEL\logs\age_group_mapping.txt
