In [None]:
import pandas as pd
import os
import glob

In [None]:
def find_and_merge_files(input_dir, output_dir, language, model):
    # Define patterns to match files
    ai_pattern = f"{language}_Ai_({model})"
    human_pattern = f"{language}_Human"

    # Find all matching AI model files
    ai_files = glob.glob(os.path.join(input_dir, f"*{ai_pattern}*.csv"))
    # Find all matching human annotation files
    human_files = glob.glob(os.path.join(input_dir, f"*{human_pattern}*.csv"))

    # Combine all files to merge
    all_files = ai_files + human_files

    # Load all files into dataframes
    dataframes = []
    for file_path in all_files:
        df = pd.read_csv(file_path)
        dataframes.append(df)

    # Concatenate all dataframes
    merged_df = pd.concat(dataframes, ignore_index=True)

    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Save the merged dataframe to a new CSV file
    output_filename = f"{language}_({model}).csv"
    output_path = os.path.join(output_dir, output_filename)
    merged_df.to_csv(output_path, index=False)
    print(f"Saved merged file at {output_path}")

#--------------------------------------

input_directory = 'C:/Users/amirm/Desktop/MONASH/Thesis/Working/Prepare/Code/Gen_Ai_Text/Combine_Datasets/Output_Files'
output_directory = 'C:/Users/amirm/Desktop/MONASH/Thesis/Working/Prepare/Code/9_Dataset'

# Define the languages, models, and corresponding output names
merge_configs = [
    {'language': 'English', 'model': 'open-mistral-7b'},
    {'language': 'English', 'model': 'gemini-pro'},
    {'language': 'English', 'model': 'gpt-3.5-turbo-0125'},
    {'language': 'French', 'model': 'open-mistral-7b'},
    {'language': 'French', 'model': 'gemini-pro'},
    {'language': 'French', 'model': 'gpt-3.5-turbo-0125'},
    {'language': 'Spanish', 'model': 'open-mistral-7b'},
    {'language': 'Spanish', 'model': 'gemini-pro'},
    {'language': 'Spanish', 'model': 'gpt-3.5-turbo-0125'}
]

# Execute merging for each configuration
for config in merge_configs:
    find_and_merge_files(input_directory, output_directory, config['language'], config['model'])


Saved merged file at C:/Users/amirm/Desktop/MONASH/Thesis/Working/Prepare/Code/9_Dataset\English_(open-mistral-7b).csv
Saved merged file at C:/Users/amirm/Desktop/MONASH/Thesis/Working/Prepare/Code/9_Dataset\English_(gemini-pro).csv
Saved merged file at C:/Users/amirm/Desktop/MONASH/Thesis/Working/Prepare/Code/9_Dataset\English_(gpt-3.5-turbo-0125).csv
Saved merged file at C:/Users/amirm/Desktop/MONASH/Thesis/Working/Prepare/Code/9_Dataset\French_(open-mistral-7b).csv
Saved merged file at C:/Users/amirm/Desktop/MONASH/Thesis/Working/Prepare/Code/9_Dataset\French_(gemini-pro).csv
Saved merged file at C:/Users/amirm/Desktop/MONASH/Thesis/Working/Prepare/Code/9_Dataset\French_(gpt-3.5-turbo-0125).csv
Saved merged file at C:/Users/amirm/Desktop/MONASH/Thesis/Working/Prepare/Code/9_Dataset\Spanish_(open-mistral-7b).csv
Saved merged file at C:/Users/amirm/Desktop/MONASH/Thesis/Working/Prepare/Code/9_Dataset\Spanish_(gemini-pro).csv
Saved merged file at C:/Users/amirm/Desktop/MONASH/Thesis/Wo