In [None]:
import os
import pandas as pd
from tqdm import tqdm
from fuzzywuzzy import process
from fuzzywuzzy import fuzz

def merge_csvs():
    csv_data_path = "csv_data"
    output_base_path = "output"

    years_with_parameters = [2018, 2019, 2021, 2022, 2023, 2024, 2025]

    for year in years_with_parameters:
        print(f"\nüîç Merging for {year}:")
        year_csv_path = os.path.join(csv_data_path, f"nirf_data_{year}.csv")
        output_year_folder = os.path.join(output_base_path + str(year))

        if not os.path.exists(year_csv_path):
            print(f"‚ùå Year CSV not found: {year_csv_path}")
            continue

        if not os.path.exists(output_year_folder):
            print(f"‚ùå Parameter folder not found: {output_year_folder}")
            continue

        year_df = pd.read_csv(year_csv_path)
        year_df["Name_lower"] = year_df["Name"].str.lower().str.strip()
        college_names = year_df["Name_lower"].tolist()

        college_folders = [
            name for name in os.listdir(output_year_folder)
            if os.path.isdir(os.path.join(output_year_folder, name))
        ]

        new_cols_added = set()

        for folder_name in tqdm(college_folders, desc=f"üîç Merging for {year}"):
            folder_path = os.path.join(output_year_folder, folder_name)
            parameter_file = os.path.join(folder_path, "parameter_scores.csv")

            if not os.path.exists(parameter_file):
                continue

            try:
                param_df = pd.read_csv(parameter_file)

                if param_df.shape[0] < 1:
                    continue

                param_dict = param_df.iloc[0].to_dict()
                new_cols_added.update(param_dict.keys())

                match_name, score = process.extractOne(folder_name.lower().strip(), college_names, scorer=fuzz.token_sort_ratio)

                if score < 85:
                    continue

                match_index = year_df[year_df["Name_lower"] == match_name].index
                if match_index.empty:
                    continue

                for key, val in param_dict.items():
                    colname = f"{key}"
                    year_df.loc[match_index, colname] = val

            except Exception as e:
                print(f"‚ö†Ô∏è Error processing {parameter_file}: {e}")

        year_df.drop(columns=["Name_lower"], inplace=True, errors="ignore")

        year_df.to_csv(year_csv_path, index=False)
        print(f"‚úÖ Updated year file with parameter data: {year_csv_path}")

