In [1]:
import pandas as pd
import os


In [2]:
def compile_csv_files(file_paths, output_file):
    """
    Compile multiple CSV files into one and save the result to a new file.

    Args:
        file_paths (list): List of file paths to the CSV files.
        output_file (str): Path to the output CSV file.

    Returns:
        None
    """
    combined_df = pd.DataFrame()

    for file_path in file_paths:
        df = pd.read_csv(file_path)
        combined_df = pd.concat([combined_df, df], ignore_index=True)

    combined_df.to_csv(output_file, index=False)
    print(f"Compiled file saved as {output_file}")


In [3]:
def remove_duplicates(input_file, output_file):
    """
    Remove duplicates from a CSV file and save the result to a new file.

    Args:
        input_file (str): Path to the input CSV file.
        output_file (str): Path to the output CSV file.

    Returns:
        None
    """
    df = pd.read_csv(input_file)
    df = df.drop_duplicates()
    df.to_csv(output_file, index=False)
    print(f"Duplicates removed. Clean file saved as {output_file}")

# File paths provided
file_paths = [
    '2020-2021.csv',
    '2018-2019.csv',
    '2022-2023.csv',
    '2024.csv',
    '2013-2017_2.csv'
]

# Output paths
compiled_file = 'year_compiled.csv'
clean_file = 'year_compiled_cleaned.csv'

# Compile files and remove duplicates
compile_csv_files(file_paths, compiled_file)
remove_duplicates(compiled_file, clean_file)

Compiled file saved as year_compiled.csv
Duplicates removed. Clean file saved as year_compiled_cleaned.csv
