In [3]:
import pandas as pd
from pathlib import Path

BASE_DIR = Path(".")

def read_with_id(path):
    df = pd.read_excel(path)
    id_col = "Azonosító"
    df = df.copy()
    df["id_merge"] = df[id_col].astype(str).str.upper()
    return df, id_col

def drop_id_cols(df, id_col_name):
    cols = [c for c in df.columns if c not in [id_col_name, "id_merge"]]
    return df[["id_merge"] + cols]

file_groups = [
    {
        "base":    "CERV-F3-2024_89db_data_clean.xlsx",
        "activity": "CERV-F3-2024_89db_text_activity_counts_terciles.xlsx",
        "eu":       "CERV-F3-2024_89db_text_EU_counts_terciles.xlsx",
        "topic":    "CERV-F3-2024_89db_text_topic_counts_terciles.xlsx",
        "output":   "CERV-F3-2024_89db_full_merged.xlsx",
    },
    {
        "base":    "CERV-P-2023_93db_data_clean.xlsx",
        "activity": "CERV-P-2023_93db_text_activity_counts_terciles.xlsx",
        "eu":       "CERV-P-2023_93db_text_EU_counts_terciles.xlsx",
        "topic":    "CERV-P-2023_93db_text_topic_counts_terciles.xlsx",
        "output":   "CERV-P-2023_93db_full_merged.xlsx",
    },
    {
        "base":    "F_2023_FK02_259db_data_clean.xlsx",
        "activity": "F_2023_FK02_259db_text_activity_counts_terciles.xlsx",
        "eu":       "F_2023_FK02_259db_text_EU_counts_terciles.xlsx",
        "topic":    "F_2023_FK02_259db_text_topic_counts_terciles.xlsx",
        "output":   "F_2023_FK02_259db_full_merged.xlsx",
    },
]

for group in file_groups:
    base_df, base_id_col = read_with_id(BASE_DIR / group["base"])
    act_df, act_id_col   = read_with_id(BASE_DIR / group["activity"])
    eu_df,  eu_id_col    = read_with_id(BASE_DIR / group["eu"])
    top_df, top_id_col   = read_with_id(BASE_DIR / group["topic"])

    act_df = drop_id_cols(act_df, act_id_col)
    eu_df  = drop_id_cols(eu_df,  eu_id_col)
    top_df = drop_id_cols(top_df, top_id_col)

    merged = base_df.merge(act_df, on="id_merge", how="left")
    merged = merged.merge(eu_df,  on="id_merge", how="left")
    merged = merged.merge(top_df, on="id_merge", how="left")

    merged = merged.drop(columns=["id_merge"])

    out_path = BASE_DIR / group["output"]
    merged.to_excel(out_path, index=False)
    print(f"Kész: {out_path}")

print("Minden merge sikeresen lefutott!")


Kész: CERV-F3-2024_89db_full_merged.xlsx
Kész: CERV-P-2023_93db_full_merged.xlsx
Kész: F_2023_FK02_259db_full_merged.xlsx
Minden merge sikeresen lefutott!
