
This notebook decodes header survey datasets using metadata Sheet 1 and Sheet 2 definitions.  
It renames raw variable codes into human‑readable descriptions, ensuring consistency across all survey files.

##### Dependencies
- Requires `00_Settings.ipynb` to be executed first (defines `inventory`, `base_path`, and global settings).
- Requires reshaped metadata outputs from `01_Metadata_Sheet1_Reshaper.ipynb`.
##### Outputs
- Decoded survey CSVs saved into the folder: **NEW Header Encoded Surveys**
- Formal translation reports per survey (coverage, untranslated codes).
##### Notes
- This notebook is functional and does not require reruns once executed.
- Each notebook in the pipeline retains its function and respects assigned variables consistently.


### Loader Function

In [None]:
import os
import pandas as pd

def load_dataset(year, month, filetype="survey"):
    """
    Locate and load a dataset file (CSV or Excel) from the global inventory.
    Relies on 'inventory' and 'base_path' defined in 00_Settings.ipynb.
    """
    if year not in inventory or month not in inventory[year]:
        raise ValueError(f"No records found in inventory for {month} {year}.")

    files = inventory[year][month]
    found_file = next((f for f in files if f['filetype'] == filetype), None)
    if not found_file:
        raise FileNotFoundError(f"No {filetype} file found for {month} {year}.")

    file_path = os.path.join(base_path, year, found_file['filename'])
    return pd.read_csv(file_path, low_memory=False) if filetype == "survey" else pd.read_excel(file_path)


def load_clean_sheet1(year, month):
    """
    Load processed variable definitions (Sheet 1) from 'Metadata Sheet 1 CSV's'.
    """
    folder_name = "Metadata Sheet 1 CSV's"
    filename = f"Sheet1_{month}_{year}.csv"
    file_path = os.path.join(base_path, folder_name, year, filename)

    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Processed metadata file not found at {file_path}")

    return pd.read_csv(file_path)


### Header translation

In [None]:
def apply_metadata_headers(survey_df, metadata_sheet1_df, year="Unknown", month="Survey"):
    """
    Rename raw survey columns using metadata definitions.
    Prints a translation report showing coverage and untranslated codes.
    """
    metadata_sheet1_df['Variable'] = metadata_sheet1_df['Variable'].astype(str).str.strip()
    metadata_sheet1_df['Description'] = metadata_sheet1_df['Description'].astype(str).str.strip()

    header_map = dict(zip(metadata_sheet1_df['Variable'], metadata_sheet1_df['Description']))

    original_cols = set(survey_df.columns)
    translated_cols = original_cols.intersection(header_map.keys())
    untranslated_cols = original_cols - header_map.keys()

    renamed_df = survey_df.rename(columns=header_map)

    print("\n" + "="*60)
    print(f"METADATA TRANSLATION REPORT: {month.upper()} {year}")
    print("="*60)
    print(f"Total Columns Detected:       {len(original_cols)}")
    print(f"Successfully Decoded:         {len(translated_cols)}")
    print(f"Remaining as Raw Codes:       {len(untranslated_cols)}")
    print("-" * 60)

    if not untranslated_cols:
        print("Status: SUCCESS (100% Metadata Coverage)")
    else:
        print("Status: PARTIAL SUCCESS")
        print("Untranslated Codes:", sorted(list(untranslated_cols)))

    print("="*60 + "\n")
    return renamed_df


### Batch Automation