In [None]:
import os
import pandas as pd
import Config
# ----------------------------
# CONFIG
# ----------------------------
PERSON_FOLDERS_DIR = r'{}\Working environment'.format(Config.BASE_PATH)
OPERATIONAL_DIR = r'{}\backend'.format(Config.BASE_PATH)
NUM_PERSONS = 36

# Expected 14 service files
SERVICE_LIST = [f"Service_{i}" for i in range(1, 15)]

# ----------------------------
# LOAD ALL OPERATIONAL DATA ONCE
# ----------------------------
operational_cache = {}

for service in SERVICE_LIST:
    operational_file = f"backend_operations_{service.lower()}.csv"   # backend_operations_service_1.csv
    operational_path = os.path.join(OPERATIONAL_DIR, operational_file)

    if os.path.exists(operational_path):
        df_op = pd.read_csv(operational_path)
        operational_cache[service] = df_op
    else:
        print(f"Missing operational dataset for: {service}")

print(f"\nLoaded {len(operational_cache)} operational datasets into memory.\n")


# ----------------------------
# GOLD PROCESSING FUNCTION
# ----------------------------
def process_gold_for_person(person_folder):
    silver_folder = os.path.join(person_folder, "Silver")
    gold_folder = os.path.join(person_folder, "Gold")
    os.makedirs(gold_folder, exist_ok=True)

    silver_files = [f for f in os.listdir(silver_folder) if f.endswith(".xlsx")]

    for file_name in silver_files:
        service_name = file_name.replace(".xlsx", "")   # e.g. Service_1

        if service_name not in SERVICE_LIST:
            continue

        # ----------------------------
        # Load Silver (combined service file)
        # ----------------------------
        silver_file_path = os.path.join(silver_folder, file_name)
        df_silver = pd.read_excel(silver_file_path)

        # ----------------------------
        # Grab cached operational dataset
        # ----------------------------
        if service_name not in operational_cache:
            print(f"No operational dataset found in cache for {service_name}. Skipping.")
            continue

        df_operational = operational_cache[service_name]

        # ----------------------------
        # MERGE
        # ----------------------------
        df_merged = df_silver.merge(df_operational,how="left",on=["area_id", "service_type"])
        df_merged= df_merged.drop_duplicates(subset='transaction_id')

        # ----------------------------
        # SAVE â†’ GOLD
        # ----------------------------
        output_path = os.path.join(gold_folder, f"{service_name}_final.xlsx")
        df_merged.to_excel(output_path, index=False)

        print(f"Merged & saved: {output_path}")


# ----------------------------
# PROCESS ALL PERSON FOLDERS
# ----------------------------
for i in range(1, NUM_PERSONS + 1):
    person_folder = os.path.join(PERSON_FOLDERS_DIR, f"Person_{i}")
    process_gold_for_person(person_folder)

print("\n Gold processing complete for all 36 folders.")
