In [1]:
import pandas as pd
import os
from pathlib import Path

def combine_csv_files(input_path):
    """
    Combines all CSV files in the specified directory into a single CSV file.
    
    Args:
        input_path (str): Path to the directory containing CSV files
    
    Returns:
        pd.DataFrame: Combined dataframe
        str: Output file path
    """
    # Create Path object
    directory = Path(input_path)
    
    # Check if directory exists
    if not directory.exists():
        raise FileNotFoundError(f"Directory not found: {input_path}")
    
    # Get all CSV files in the directory
    csv_files = list(directory.glob('*.csv'))
    
    if not csv_files:
        raise FileNotFoundError(f"No CSV files found in {input_path}")
    
    # Read and combine all CSV files
    dfs = []
    for file in csv_files:
        df = pd.read_csv(file)
        # Add source filename as a column
        df['Source_File'] = file.name
        dfs.append(df)
    
    # Combine all dataframes
    combined_df = pd.concat(dfs, ignore_index=True)
    
    # Create output filename based on directory name
    output_filename = f"combined_{directory.name.lower()}.csv"
    output_path = directory.parent / output_filename
    
    # Save combined dataframe
    combined_df.to_csv(output_path, index=False)
    
    return combined_df, str(output_path)

def main():
    # List of directories to process
    directories = [
        "Cleaned_Data/Accidents",
        "Cleaned_Data/Casualties",
        "Cleaned_Data/Vehicles"
    ]
    
    # Process each directory
    for directory in directories:
        try:
            print(f"\nProcessing directory: {directory}")
            df, output_path = combine_csv_files(directory)
            print(f"Combined {len(df)} rows from {df['Source_File'].nunique()} files")
            print(f"Output saved to: {output_path}")
            
        except FileNotFoundError as e:
            print(f"Error: {e}")
        except Exception as e:
            print(f"An error occurred while processing {directory}: {e}")

if __name__ == "__main__":
    main()


Processing directory: Cleaned_Data/Accidents
Combined 529294 rows from 4 files
Output saved to: Cleaned_Data\combined_accidents.csv

Processing directory: Cleaned_Data/Casualties
Combined 699163 rows from 4 files
Output saved to: Cleaned_Data\combined_casualties.csv

Processing directory: Cleaned_Data/Vehicles
Combined 975680 rows from 4 files
Output saved to: Cleaned_Data\combined_vehicles.csv
