In [1]:
import os
import pandas as pd
from datetime import datetime

def process_csv_files(input_dir, output_dir):
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)
    
    # Iterate through all folders and files in the given input directory
    for root, dirs, files in os.walk(input_dir):
        for file in files:
            if file.endswith(".csv"):
                file_path = os.path.join(root, file)
                try:
                    # Read the CSV file with ';' as separator
                    df = pd.read_csv(file_path, sep=";")
                    
                    # Check if the 'name' column exists
                    if "name" in df.columns:
                        # Extract the "v2" string from the original filename, if present
                        version_part = "_v2" if "v2" in file else ""
                        
                        # Group data by the 'name' column
                        grouped = df.groupby('name')
                        
                        # Process each category
                        for category, data in grouped:
                            # Create a timestamped filename for the category
                            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                            category_filename = f"{category}{version_part}_{timestamp}.csv"
                            category_filepath = os.path.join(output_dir, category_filename)
                            
                            # Save the category data to a new CSV file
                            data.to_csv(category_filepath, sep=";", index=False)
                            print(f"Saved category '{category}' to {category_filepath}")
                    else:
                        print(f"Warning: 'name' column not found in {file_path}")
                except Exception as e:
                    print(f"Error processing file {file_path}: {e}")

In [7]:
input_directory = "MGAB_filter_fft_21"  # Change to your input directory path
output_directory = "MGAB_filter_fft_21_split"  # Change to your output directory path
category_column = "name"  # Change to your desired category column index or name

process_csv_files(input_directory, output_directory)

Saved category 'MGAB_filter_fft_21_1.test.out' to MGAB_filter_fft_21_split\MGAB_filter_fft_21_1.test.out_20250113_204215.csv
Saved category 'MGAB_filter_fft_21_9.test.out' to MGAB_filter_fft_21_split\MGAB_filter_fft_21_9.test.out_20250113_204215.csv
Saved category 'MGAB_filter_fft_21_10.test.out' to MGAB_filter_fft_21_split\MGAB_filter_fft_21_10.test.out_20250113_204215.csv
Saved category 'MGAB_filter_fft_21_2.test.out' to MGAB_filter_fft_21_split\MGAB_filter_fft_21_2.test.out_20250113_204215.csv
Saved category 'MGAB_filter_fft_21_3.test.out' to MGAB_filter_fft_21_split\MGAB_filter_fft_21_3.test.out_20250113_204215.csv
Saved category 'MGAB_filter_fft_21_4.test.out' to MGAB_filter_fft_21_split\MGAB_filter_fft_21_4.test.out_20250113_204215.csv
Saved category 'MGAB_filter_fft_21_5.test.out' to MGAB_filter_fft_21_split\MGAB_filter_fft_21_5.test.out_20250113_204215.csv
Saved category 'MGAB_filter_fft_21_6.test.out' to MGAB_filter_fft_21_split\MGAB_filter_fft_21_6.test.out_20250113_204215.cs

In [4]:
import os
import pandas as pd

def add_prefix_to_name_column(directory, prefix):
    """
    Adds a prefix to the 'name' column of all CSV files in the specified directory.

    Parameters:
        directory (str): Path to the directory containing the CSV files.
        prefix (str): The prefix to add to the 'name' column.

    Returns:
        None
    """
    # Iterate through all CSV files in the directory
    for file in os.listdir(directory):
        if file.endswith(".csv"):
            file_path = os.path.join(directory, file)
            try:
                # Read the CSV file with ';' as separator
                df = pd.read_csv(file_path, sep=";")
                
                # Check if the 'name' column exists
                if "name" in df.columns:
                    # Add the prefix to the 'name' column
                    df["name"] = prefix + df["name"].astype(str)
                    
                    # Save the updated CSV back to the same file
                    df.to_csv(file_path, sep=";", index=False)
                    print(f"Added prefix to 'name' column in {file_path}")
                else:
                    print(f"Warning: 'name' column not found in {file_path}")
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")


In [6]:

processed_files_directory = "MGAB_filter_fft_21"

prefix_to_add = "MGAB_filter_fft_21_"

add_prefix_to_name_column(processed_files_directory, prefix_to_add)


Added prefix to 'name' column in MGAB_filter_fft_21\MGAB_filter_fft_21_1.csv
Added prefix to 'name' column in MGAB_filter_fft_21\MGAB_filter_fft_21_10.csv
Added prefix to 'name' column in MGAB_filter_fft_21\MGAB_filter_fft_21_2.csv
Added prefix to 'name' column in MGAB_filter_fft_21\MGAB_filter_fft_21_3.csv
Added prefix to 'name' column in MGAB_filter_fft_21\MGAB_filter_fft_21_4.csv
Added prefix to 'name' column in MGAB_filter_fft_21\MGAB_filter_fft_21_5.csv
Added prefix to 'name' column in MGAB_filter_fft_21\MGAB_filter_fft_21_6.csv
Added prefix to 'name' column in MGAB_filter_fft_21\MGAB_filter_fft_21_7.csv
Added prefix to 'name' column in MGAB_filter_fft_21\MGAB_filter_fft_21_8.csv
Added prefix to 'name' column in MGAB_filter_fft_21\MGAB_filter_fft_21_9.csv
Added prefix to 'name' column in MGAB_filter_fft_21\MGAB_filter_fft_21_v2_1.csv
Added prefix to 'name' column in MGAB_filter_fft_21\MGAB_filter_fft_21_v2_10.csv
Added prefix to 'name' column in MGAB_filter_fft_21\MGAB_filter_fft_