In [7]:
import pandas as pd
from pathlib import Path

# ============================================================
# 0. Base folder and input files (adjust as needed)
# ============================================================

BASE = Path("C:/Users/HP/OneDrive/Desktop/VERO_code/Phase_1/new_data")  

files = {
    "codige": BASE / "codige.xlsx",
    "adr": BASE / "codige_adr_clean.xlsx",
    "comorb": BASE / "codige_comorbidità.xlsx",
    "ricoveri": BASE / "codige_ricoveri.xlsx",
    "tratt_onco": BASE / "codige_trattamento_oncologico.xlsx",
    "fi_lab": BASE / "FI_lab_score.xls",   
    "mut": BASE / "codige_tabella_alterazioni_mutazioni.xlsx",
    "comorb_cat": BASE / "codige_tabella_comorbidità_categoria.xlsx",
    "farmaco": BASE / "codige_tabella_farmaco.xlsx",
    "tmp_comorb": BASE / "codige_tmp_tabella_comorbilità.xlsx"

}

# ============================================================
# 1. Column renaming map (Italian -> English)
#    (Exactly what you used on merged data)
# ============================================================

col_map = {
    "id_paziente": "patient_id",
    "unita_operativa": "operating_unit_name",
    "data_nascita": "birth_date",
    "eta": "age",
    "eta_gruppo": "age_group",
    "sesso": "gender",
    "etnia": "ethnicity",
    "titolo_studio": "education_level",
    "bmi": "bmi_value",
    "bmi_fasce": "bmi_category",
    "attivita_lavorativa": "employment_status",
    "alcool": "alcohol_consumption",
    "fumo": "smoking_status_binary",
    "fumo_dettaglio": "smoking_status_detail",
    "da_quanti_anni_fuma": "smoking_years",
    "n_sigarette": "cigarettes_per_day",
    "data_osservazione_fu": "observation_start_date",
    "fine_osservazione_codige_dt": "observation_end_date",
    "fine_osservazione_codige_motivo": "observation_end_reason",
    "diagnosi_tumore_dt": "tumor_diagnosis_date",
    "presa_incarico_uo_dt": "oncology_unit_start_date",
    "tipo_tumore": "tumor_type",
    "kmammella_sottotipo": "breast_cancer_subtype",
    "kcolon_locazione": "colon_cancer_location",
    "stadio": "tumor_stage_roman",
    "stadio_tnm": "tumor_stage_tnm",
    "grado_istologico": "histological_grade",
    "alterazioni_molecolari": "molecular_alterations",
    "mutazioni": "mutations_present",
    "genotipo_dpyd": "dpyd_genotype_known",
    "genotipo_dpyd_type": "dpyd_genotype_type",
    "intervento_chirurgico": "surgical_intervention",
    "intervento_chirurgico_dt": "surgery_date",
    "intervento_chirurgico_tipo": "surgery_type",
    "intervento_chirurgico_specificare": "surgery_type_specify",
    "pregresso_intervento": "prior_surgery",
    "intervento_chirurgico_complicanze": "surgery_complications",
    "intervento_chirurgico_per_complicanze_intervento": "reoperation_for_complication",
    "ricovero_per_complicanze_intervento": "hospitalization_for_surgery_complication",
    "linee_trattamento_oncologico_n": "oncology_treatment_lines_n",
    "pregresso_radioterapia": "prior_radiotherapy",
    "radioterapia_dt_inizio": "radiotherapy_start_date",
    "radioterapia_dt_fine": "radiotherapy_end_date",
    "trasfusione": "transfusion_received",
    "trasfusioni_ntot": "transfusions_total_n",
    "ipertensione": "hypertension",
    "insufficienza_aortica": "aortic_insufficiency",
    "dislipidemie": "dyslipidemia",
    "dispilidemie": "dyslipidemia",
    "ipb": "bph",
    "obesita": "obesity_comorbidity",
    "cardiopatia_ischemica": "ischemic_heart_disease",
    "fibrillazione_atriale": "atrial_fibrillation",
    "bpco": "copd",
    "asma": "asthma",
    "diabete_tipo_ii": "diabetes_type_ii",
    "diabete_tipoII": "diabetes_type_ii",
    "reflusso_gastro": "gastroesophageal_reflux",
    "reflusso_gastroesofageo": "gastroesophageal_reflux_full",
    "insufficienza_renale": "renal_insufficiency",
    "sindrome_depressiva": "depressive_syndrome",
    "anemia": "anemia_comorbidity",
    "patologie_psichiatriche": "psychiatric_disorders",
    "altre_patologie": "other_comorbidities",
    "patologie_cardiovascolari": "cardiovascular_disorders",
    "patologie_gastrointestinali": "gastrointestinal_disorders",
    "patologie_cerebrovascolari": "cerebrovascular_disorders",
    "ricoveri_n": "hospitalizations_n",
    "ricoveri_ord_n": "ordinary_hospitalizations_n",
    "decesso": "death_during_observation",
    "adr": "adr_description",
    "adr_n_tot": "adr_n_tot",
    "adr_n_grado1": "adr_n_grado1",
    "adr_n_grado2": "adr_n_grado2",
    "adr_n_grado3": "adr_n_grado3",
    "adr_n_grado4": "adr_n_grado4",
    "adr_n_grado5": "adr_n_grado5",
    "giorni_osservazione": "observation_days",
    "data_decesso": "death_date",
    "glicemia": "blood_glucose_range",
    "gb": "white_blood_cells_range",
    "gr": "red_blood_cells_range",
    "hb": "hemoglobin_range",
    "neu": "neutrophils_percent_range",
    "conta_piastrinica": "platelet_count_range",
    "creati": "creatinine_range",
    "ast_got": "ast_got_range",
    "alt_gpt": "alt_gpt_range",
    "azotemia": "azotemia_range",
    "gamma_gt": "gamma_gt_range",
    "bilirubina_tot": "total_bilirubin_range",
    "bilirubina_dir": "direct_bilirubin_range",
    "albumina": "albumin_range",
    "fe": "ejection_fraction_percent",
    "fe_cat": "ejection_fraction_category",
    "chemio_linee": "chemo_lines_total_n",
    "chemio_fine_progressione": "end_reason_progression_any_line",
    "chemio_fine_tossicità": "end_reason_toxicity_any_line",
    "chemio_fine_altro": "end_reason_other_any_line",
    "insorgenza_dt": "adr_onset_date",
    "tipo": "hospitalization_type",
    "grado": "adr_ctcae_grade",
    "esito": "adr_outcome",
    "correlazione_chemio": "adr_chemo_correlation",
    "azione_chemio": "adr_chemo_action",
    "provenienza": "adr_source_project",
    "adr_clean": "adr_description_clean",
    "macrocategoria": "adr_macro_category",
    "accesso_dt": "hospital_admission_date",
    "dimissione_dt": "hospital_discharge_date",
    "diagnosi_accesso": "admission_diagnosis",
    "diagnosi_dimissione": "discharge_diagnosis",
    "modalita": "admission_mode",
    "durata_ps": "er_stay_duration",
    "correlazione": "hospitalization_cause",
    "modifica_schema_onco": "oncology_schema_modified",
    "linea_tattamento_oncologico": "treatment_line_n",
    "nome_schema_chemio": "chemo_schema_name",
    "inizio_schema_chemio_dt": "chemo_schema_start_date",
    "fine_schema_chemio_dt": "chemo_schema_end_date",
    "cicli_n": "chemo_cycles_n",
    "motivo_fine_schema_chemio": "chemo_schema_end_reason",
    "tossicita_tipo": "toxicity_type",
    "principio_attivo": "active_principle",
    "dose_ridotta": "dose_reduced",
    "principio_attivo_n": "active_principles_n",
    "comorbidita_cat": "comorbidity_category_list",
    "comorbidita_dt": "comorbidity_diagnosis_date",
    "cardiopatia_ipertensiva": "hypertensive_heart_disease",
    "radioterapia": "radiotherapy_status",
    # suffixed ricoveri2 columns (if present)
    "ricovero_n_ricoveri2": "hospitalizations_n_ricoveri2",
    "accesso_dt_ricoveri2": "hospital_admission_date_ricoveri2",
    "dimissione_dt_ricoveri2": "hospital_discharge_date_ricoveri2",
    "diagnosi_accesso_ricoveri2": "admission_diagnosis_ricoveri2",
    "diagnosi_dimissione_ricoveri2": "discharge_diagnosis_ricoveri2",
    "modalita_ricoveri2": "admission_mode_ricoveri2",
    "durata_ps_ricoveri2": "er_stay_duration_ricoveri2",
    "tipo_ricoveri2": "hospitalization_type_ricoveri2",
    "correlazione_ricoveri2": "hospitalization_cause_ricoveri2",
    "modifica_schema_onco_ricoveri2": "oncology_schema_modified_ricoveri2",
}

# ============================================================
# 2. Value / categorical maps (same as your previous code)
# ============================================================

BINARY_PRESENCE_MAP = {0: 'Absent / No', 1: 'Present / Yes', "0": 'Absent / No', "1": 'Present / Yes'}
GENDER_MAP_PRE_PROCESSING = {
    1: 'Male', 2: 'Female',
    "1": 'Male', "2": 'Female',
    "Maschio": "Male", "Femmina": "Female"
}

STANDARD_CATEGORICAL_MAPS = {
    'age_group': {1: '<= 65 years', 2: '> 65 years', "1": '<= 65 years', "2": '> 65 years'},
    'ethnicity': {
        0: 'Missing / Not Noted', 1: 'African or African American', 2: 'Asian', 3: 'Caucasian',
        "0": 'Missing / Not Noted', "1": 'African or African American', "2": 'Asian', "3": 'Caucasian'
    },
    'education_level': {
        0: 'Not Known / Missing', 1: 'Elementary School', 2: 'Middle School', 3: 'High School', 4: 'University Degree',
        "0": 'Not Known / Missing', "1": 'Elementary School', "2": 'Middle School', "3": 'High School', "4": 'University Degree'
    },
    'bmi_category': {
        1: '<18.5 Underweight', 2: '18.5-24.9 Normal Weight', 3: '25-29.99 Overweight', 4: '>=30 Obese',
        "1": '<18.5 Underweight', "2": '18.5-24.9 Normal Weight', "3": '25-29.99 Overweight', "4": '>=30 Obese'
    },
    'employment_status': {
        0: 'Not Known / Missing', 1: 'Unemployed', 2: 'Homemaker', 3: 'Worker', 4: 'Retired',
        "0": 'Not Known / Missing', "1": 'Unemployed', "2": 'Homemaker', "3": 'Worker', "4": 'Retired'
    },
    'alcohol_consumption': {
        0: 'Not Known / Missing', 1: 'Non-drinker', 2: 'Yes, Moderate', 3: 'Yes, High',
        "0": 'Not Known / Missing', "1": 'Non-drinker', "2": 'Yes, Moderate', "3": 'Yes, High'
    },
    'smoking_status_detail': {
        0: 'Not Known / Missing', 1: 'Never Smoked', 2: 'Current Smoker', 3: 'Ex-Smoker',
        "0": 'Not Known / Missing', "1": 'Never Smoked', "2": 'Current Smoker', "3": 'Ex-Smoker'
    },
    'observation_end_reason': {
        1: 'End of Treatment with Follow-Up', 2: 'Lost to Follow-Up',
        3: 'End of Study Period (ONCO22 - 31/12/2023)', 4: 'Death',
        5: 'Informed Consent Withdrawal', 6: 'Other', 7: 'Transferred',
        8: 'End of Study Period (Codige - 31/12/2024)',
        "1": 'End of Treatment with Follow-Up', "2": 'Lost to Follow-Up',
        "3": 'End of Study Period (ONCO22 - 31/12/2023)', "4": 'Death',
        "5": 'Informed Consent Withdrawal', "6": 'Other', "7": 'Transferred',
        "8": 'End of Study Period (Codige - 31/12/2024)'
    },
    'breast_cancer_subtype': {1: 'Luminal', 2: 'HER2', 3: 'Triple Negative',
                              "1": 'Luminal', "2": 'HER2', "3": 'Triple Negative'},
    'colon_cancer_location': {1: 'Left', 2: 'Right', "1": 'Left', "2": 'Right'},
    'tumor_stage_roman': {'I': 'Stage I', 'II': 'Stage II', 'III': 'Stage III', 'IV': 'Stage IV'},
    'histological_grade': {1: 'G1', 2: 'G2', 3: 'G3', "1": 'G1', "2": 'G2', "3": 'G3'},
    'surgery_type': {
        1: 'Partial Tumor Excision', 2: 'Total Tumor Excision',
        3: 'Surgical Biopsy', 4: 'Laparoscopy',
        5: 'Palliative Procedure', 6: 'Other (Specify)',
        "1": 'Partial Tumor Excision', "2": 'Total Tumor Excision',
        "3": 'Surgical Biopsy', "4": 'Laparoscopy',
        "5": 'Palliative Procedure', "6": 'Other (Specify)'
    },
    'admission_mode': {1: 'Programmed', 2: 'Emergency Room (PS)', "1": 'Programmed', "2": 'Emergency Room (PS)'},
    'er_stay_duration': {1: 'Less than 24 hours', 2: 'More than 24 hours',
                         "1": 'Less than 24 hours', "2": 'More than 24 hours'},
    'hospitalization_type': {1: 'Ordinary', 2: 'Day Hospital / Day Service (PACC)',
                             "1": 'Ordinary', "2": 'Day Hospital / Day Service (PACC)'},
    'hospitalization_cause': {
        'Tumore': 'Tumor-Related',
        'Trattamento oncologico': 'Oncology Treatment-Related',
        'Altro (Specificare)': 'Other (Specify)'
    },
    'adr_event_present': BINARY_PRESENCE_MAP,
    'death_during_observation': BINARY_PRESENCE_MAP,
    'molecular_alterations': BINARY_PRESENCE_MAP,
    'mutations_present': BINARY_PRESENCE_MAP,
    'dpyd_genotype_known': {0: 'Not Known', 1: 'Known', "0": 'Not Known', "1": 'Known'},
    'dpyd_genotype_type': {1: 'Wild-Type Genotype', 2: 'Polymorphism',
                           "1": 'Wild-Type Genotype', "2": 'Polymorphism'},
    'surgical_intervention': BINARY_PRESENCE_MAP,
    'prior_surgery': BINARY_PRESENCE_MAP,
    'surgery_complications': BINARY_PRESENCE_MAP,
    'reoperation_for_complication': BINARY_PRESENCE_MAP,
    'hospitalization_for_surgery_complication': BINARY_PRESENCE_MAP,
    'prior_radiotherapy': BINARY_PRESENCE_MAP,
    'transfusion_received': BINARY_PRESENCE_MAP,
    'oncology_schema_modified': BINARY_PRESENCE_MAP,
    'dose_reduced': BINARY_PRESENCE_MAP,
    'end_reason_progression_any_line': BINARY_PRESENCE_MAP,
    'end_reason_toxicity_any_line': BINARY_PRESENCE_MAP,
    'end_reason_other_any_line': BINARY_PRESENCE_MAP,
    'hypertension': BINARY_PRESENCE_MAP,
    'aortic_insufficiency': BINARY_PRESENCE_MAP,
    'dyslipidemia': BINARY_PRESENCE_MAP,
    'bph': BINARY_PRESENCE_MAP,
    'obesity_comorbidity': BINARY_PRESENCE_MAP,
    'ischemic_heart_disease': BINARY_PRESENCE_MAP,
    'atrial_fibrillation': BINARY_PRESENCE_MAP,
    'copd': BINARY_PRESENCE_MAP,
    'asthma': BINARY_PRESENCE_MAP,
    'diabetes_type_ii': BINARY_PRESENCE_MAP,
    'gastroesophageal_reflux': BINARY_PRESENCE_MAP,
    'renal_insufficiency': BINARY_PRESENCE_MAP,
    'depressive_syndrome': BINARY_PRESENCE_MAP,
    'anemia_comorbidity': BINARY_PRESENCE_MAP,
    'psychiatric_disorders': BINARY_PRESENCE_MAP,
    'other_comorbidities': BINARY_PRESENCE_MAP,
    'cardiovascular_disorders': BINARY_PRESENCE_MAP,
    'gastrointestinal_disorders': BINARY_PRESENCE_MAP,
    'cerebrovascular_disorders': BINARY_PRESENCE_MAP,
    'adr_type_or_hosp_type': {1: 'Intermittent', 2: 'Continuous',
                              "1": 'Intermittent', "2": 'Continuous'},
    'adr_outcome': {
        1: 'Complete Resolution', 2: 'Resolution with Sequelae', 3: 'Improvement',
        4: 'Unchanged or Worsened Reaction', 5: 'Death', 6: 'Not Available',
        "1": 'Complete Resolution', "2": 'Resolution with Sequelae', "3": 'Improvement',
        "4": 'Unchanged or Worsened Reaction', "5": 'Death', "6": 'Not Available'
    },
    'adr_chemo_action': {
        1: 'Chemotherapy Treatment Interrupted',
        2: 'Oncological Schema Modified',
        3: 'Dose Reduction of One or More Drugs',
        4: 'None',
        5: 'Not Known, Not Applicable',
        6: 'Other (Specify)',
        "1": 'Chemotherapy Treatment Interrupted',
        "2": 'Oncological Schema Modified',
        "3": 'Dose Reduction of One or More Drugs',
        "4": 'None',
        "5": 'Not Known, Not Applicable',
        "6": 'Other (Specify)'
    },
    'chemo_schema_end_reason': {
        'Progressione della malattia': 'Disease Progression',
        'Tossicità (Specificare)': 'Toxicity (Specify)',
        'Altro (Specificare)': 'Other (Specify)'
    },
    'ejection_fraction_category': {
        1: 'Preserved >=50%', 2: 'Mid-Range 40-49%', 3: 'Reduced <40%',
        "1": 'Preserved >=50%', "2": 'Mid-Range 40-49%', "3": 'Reduced <40%'
    },
}

# Fallback string replacements
fallback_repl = {
    "Non noto": "Missing / Not Known",
    "Non nota": "Missing / Not Known",
    "non noto": "Missing / Not Known",
    "non nota": "Missing / Not Known",
    "Si": "Yes",
    "Sì": "Yes",
    "NO": "No",
    "SI": "Yes",
    "Maschio": "Male",
    "Femminile": "Female",
    "Femmina": "Female",
    "Programmato": "Programmed",
    "PS": "Emergency Room (PS)",
    "Altro (Specificare)": "Other (Specify)",
    "Tossicità (Specificare)": "Toxicity (Specify)",
    "Progressione della malattia": "Disease Progression",
}

# Lab maps (simplified)
SIMPLE_LAB_MAPS = {
    'ast_got': {'<21': 'Normal (<21 U/L)', '≥21': 'High (>=21 U/L)', '>=21': 'High (>=21 U/L)', 'Non noto': 'Missing / Not Known'},
    'alt_gpt': {'<21': 'Normal (<21 U/L)', '≥21': 'High (>=21 U/L)', '>=21': 'High (>=21 U/L)', 'Non noto': 'Missing / Not Known'},
    'bun': {'<10': 'Low (<10 mg/dL)', '10-50': 'Normal (10-50 mg/dL)', '>50': 'High (>50 mg/dL)', 'Non noto': 'Missing / Not Known'},
    'total_bilirubin': {'<0.2': 'Low (<0.2 mg/dL)', '0.2-1': 'Normal (0.2-1 mg/dL)', '>1': 'High (>1 mg/dL)', 'Non noto': 'Missing / Not Known'},
    'direct_bilirubin': {'<0.2': 'Normal (<0.2 mg/dL)', '>0.2': 'High (>0.2 mg/dL)', 'Non noto': 'Missing / Not Known'},
    'albumin': {'<3.6': 'Low (<3.6 g/dL)', '3.6-4.9': 'Normal (3.6-4.9 g/dL)', '>4.9': 'High (>4.9 g/dL)', 'Non noto': 'Missing / Not Known'},
    'glucose': {'<65': 'Low (<65 mg/dL)', '65-110': 'Normal (65-110 mg/dL)', '>110': 'High (>110 mg/dL)', 'Non noto': 'Missing / Not Known'},
    'white_blood_cells': {'<4000': 'Low (<4000 cells/µL)', '4000-11000': 'Normal (4000-11000 cells/µL)', '>11000': 'High (>11000 cells/µL)', 'Non noto': 'Missing / Not Known'},
    'neutrophils': {'<40': 'Low (<40%)', '40-75': 'Normal (40-75%)', '>75': 'High (>75%)', 'Non noto': 'Missing / Not Known'},
    'platelet_count': {'<150000': 'Low (<150k /µL)', '150000-400000': 'Normal (150k-400k /µL)', '>400000': 'High (>400k /µL)', 'Non noto': 'Missing / Not Known'},
}

SIMPLE_DATASET_TO_KEYS = {
    'ast_got_range': 'ast_got',
    'alt_gpt_range': 'alt_gpt',
    'azotemia_range': 'bun',
    'total_bilirubin_range': 'total_bilirubin',
    'direct_bilirubin_range': 'direct_bilirubin',
    'albumin_range': 'albumin',
    'blood_glucose_range': 'glucose',
    'white_blood_cells_range': 'white_blood_cells',
    'neutrophils_percent_range': 'neutrophils',
    'platelet_count_range': 'platelet_count',
}

GENDER_SPECIFIC_LAB_MAPS = {
    'creatinine': {
        'Male': {'<0.70': 'Low (<0.70 mg/dL)', '0.70-1.40': 'Normal (0.70-1.40 mg/dL)', '>1.40': 'High (>1.40 mg/dL)', 'Non noto': 'Missing / Not Known'},
        'Female': {'<0.70': 'Low (<0.70 mg/dL)', '0.70-1.20': 'Normal (0.70-1.20 mg/dL)', '>1.20': 'High (>1.20 mg/dL)', 'Non noto': 'Missing / Not Known'},
    },
    'red_blood_cells': {
        'Male': {'<4.5': 'Low (<4.5 mil/µL)', '4.5-5.9': 'Normal (4.5-5.9 mil/µL)', '>5.9': 'High (>5.9 mil/µL)', 'Non noto': 'Missing / Not Known'},
        'Female': {'<4.0': 'Low (<4.0 mil/µL)', '4.0-5.0': 'Normal (4.0-5.0 mil/µL)', '>5.0': 'High (>5.0 mil/µL)', 'Non noto': 'Missing / Not Known'},
    },
    'hemoglobin': {
        'Male': {'<13': 'Low (<13 g/dL)', '13-17': 'Normal (13-17 g/dL)', '>17': 'High (>17 g/dL)', 'Non noto': 'Missing / Not Known'},
        'Female': {'<12': 'Low (<12 g/dL)', '12-16': 'Normal (12-16 g/dL)', '>16': 'High (>16 g/dL)', 'Non noto': 'Missing / Not Known'},
    },
}

def map_gender_specific(value, gender_value, lab_key):
    if pd.isna(value):
        return value
    s = str(value).strip().replace("≥", ">=")
    if gender_value not in ("Male", "Female"):
        return s
    return GENDER_SPECIFIC_LAB_MAPS[lab_key].get(gender_value, {}).get(s, s)

# ============================================================
# 3. Function to translate one file independently
# ============================================================

def translate_single_file(name: str, path: Path):
    print(f"\n=== Processing table: {name} ({path.name}) ===")
    if not path.exists():
        print("  File not found. Skipping.")
        return

    df = pd.read_excel(path)

    # Strip column names
    df.columns = [c.strip() for c in df.columns]

    # 1) Column renaming (only where columns exist)
    existing_map = {k: v for k, v in col_map.items() if k in df.columns}
    df = df.rename(columns=existing_map)
    print(f"  Renamed {len(existing_map)} columns.")

    # 2) Gender normalization (if present)
    if 'gender' in df.columns:
        df['gender'] = df['gender'].replace(GENDER_MAP_PRE_PROCESSING)

    # 3) Apply standard categorical maps (only for existing columns)
    for col, cmap in STANDARD_CATEGORICAL_MAPS.items():
        if col in df.columns:
            df[col] = df[col].replace(cmap)

    # 4) Fallback replacements for text columns
    for col in df.columns:
        if pd.api.types.is_numeric_dtype(df[col]) or pd.api.types.is_datetime64_any_dtype(df[col]):
            continue
        df[col] = df[col].replace(fallback_repl)

    # 5) Simple lab mapping for range variables
    for ds_col, lab_key in SIMPLE_DATASET_TO_KEYS.items():
        if ds_col in df.columns:
            cmap = SIMPLE_LAB_MAPS[lab_key]
            df[ds_col] = (
                df[ds_col]
                .astype(str)
                .str.strip()
                .str.replace("≥", ">=", regex=False)
                .replace(cmap)
            )

    # 6) Gender-specific lab mapping (if both gender and lab range exist)
    if 'gender' in df.columns:
        gender_based_cols = {
            'creatinine_range': 'creatinine',
            'red_blood_cells_range': 'red_blood_cells',
            'hemoglobin_range': 'hemoglobin',
        }
        for ds_col, lab_key in gender_based_cols.items():
            if ds_col in df.columns:
                df[ds_col] = df.apply(
                    lambda r: map_gender_specific(r[ds_col], r['gender'], lab_key),
                    axis=1
                )

    # 7) Save translated version (per file)
    out_path = path.with_name(path.stem + "_english" + path.suffix)
    df.to_excel(out_path, index=False)
    print(f"  Saved translated file to: {out_path}")


# ============================================================
# 4. Run translation for all configured tables
# ============================================================

for name, path in files.items():
    translate_single_file(name, path)



=== Processing table: codige (codige.xlsx) ===
  Renamed 87 columns.


  df.to_excel(out_path, index=False)


  Saved translated file to: C:\Users\HP\OneDrive\Desktop\VERO_code\Phase_1\new_data\codige_english.xlsx

=== Processing table: adr (codige_adr_clean.xlsx) ===
  Renamed 11 columns.


  df.to_excel(out_path, index=False)


  Saved translated file to: C:\Users\HP\OneDrive\Desktop\VERO_code\Phase_1\new_data\codige_adr_clean_english.xlsx

=== Processing table: comorb (codige_comorbidità.xlsx) ===
  File not found. Skipping.

=== Processing table: ricoveri (codige_ricoveri.xlsx) ===
  Renamed 10 columns.


  df.to_excel(out_path, index=False)


  Saved translated file to: C:\Users\HP\OneDrive\Desktop\VERO_code\Phase_1\new_data\codige_ricoveri_english.xlsx

=== Processing table: tratt_onco (codige_trattamento_oncologico.xlsx) ===
  Renamed 10 columns.


  df.to_excel(out_path, index=False)


  Saved translated file to: C:\Users\HP\OneDrive\Desktop\VERO_code\Phase_1\new_data\codige_trattamento_oncologico_english.xlsx

=== Processing table: fi_lab (FI_lab_score.xls) ===
  File not found. Skipping.

=== Processing table: mut (codige_tabella_alterazioni_mutazioni.xlsx) ===
  Renamed 4 columns.
  Saved translated file to: C:\Users\HP\OneDrive\Desktop\VERO_code\Phase_1\new_data\codige_tabella_alterazioni_mutazioni_english.xlsx

=== Processing table: comorb_cat (codige_tabella_comorbidità_categoria.xlsx) ===


  df.to_excel(out_path, index=False)
  df.to_excel(out_path, index=False)


  Renamed 2 columns.
  Saved translated file to: C:\Users\HP\OneDrive\Desktop\VERO_code\Phase_1\new_data\codige_tabella_comorbidità_categoria_english.xlsx

=== Processing table: farmaco (codige_tabella_farmaco.xlsx) ===
  Renamed 3 columns.


  df.to_excel(out_path, index=False)


  Saved translated file to: C:\Users\HP\OneDrive\Desktop\VERO_code\Phase_1\new_data\codige_tabella_farmaco_english.xlsx

=== Processing table: tmp_comorb (codige_tmp_tabella_comorbilità.xlsx) ===
  Renamed 2 columns.


  df.to_excel(out_path, index=False)


  Saved translated file to: C:\Users\HP\OneDrive\Desktop\VERO_code\Phase_1\new_data\codige_tmp_tabella_comorbilità_english.xlsx
