In [None]:
# TSG Lab, David H. Nguyen, PhD

# This script loads all csv files in a folder. It copies all columns of the first csv file into a new data frame. 
# It then opens each of the other csv files and copies everything EXCEPT the first column. It pastes the copied columns 
# into the new data frame. It then makes sure that the first column is an index of rows from 1 to the longest column in the
# data frame. 

# This means that this script is meant to be use on csv files that have the exact same format, meaning the first column is a row index
# and the remaining columns are data. 

In [1]:
import os
import pandas as pd

In [5]:
# Ask user for folder containing CSV files
folder_path = "/Users/davidnguyen/Documents/test2"

# List all CSV files in the folder
csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

if not csv_files:
    print("No CSV files found in the folder.")
    exit()

combined_df = pd.DataFrame()

for i, file in enumerate(csv_files):
    file_path = os.path.join(folder_path, file)
    df = pd.read_csv(file_path)

    if i == 0:
        # For the first file, copy all columns including "bin_order"
        combined_df = df.copy()
    else:
        # For other files, exclude "bin_order" column
        df_no_bin = df.drop(columns=["bin_order"], errors='ignore')
        combined_df = pd.concat([combined_df, df_no_bin], axis=1)

# Find length of longest column (max rows)
max_rows = combined_df.shape[0]

# Create new bin_order column from 1 to max_rows (inclusive)
combined_df["bin_order"] = range(1, max_rows + 1)

# Save combined dataframe to CSV
output_path = os.path.join(folder_path, "multiple_files_compiled.csv")
combined_df.to_csv(output_path, index=False)

print(f"Combined CSV saved to: {output_path}")


Combined CSV saved to: /Users/davidnguyen/Documents/test2/multiple_files_compiled.csv
