Load Required Libraries:

In [1]:
import pandas as pd
import os


Set the Directory and List CSV Files:

In [2]:
# Directory where the CSV files are stored
csv_directory = "C:\\Users\\willi"

# List all CSV files in the directory
csv_files = [f for f in os.listdir(csv_directory) if f.endswith('.csv')]
csv_files


['ambient_dyno_drive_basic_long_benign_window_size_10.0_offset_10.0.csv',
 'benign_ambient_dyno_drive_basic_short_window_size_10.0_offset_10.0.csv',
 'benign_ambient_dyno_drive_benign_anomaly_window_size_10.0_offset_10.0.csv',
 'benign_ambient_dyno_drive_extended_long_window_size_10.0_offset_10.0.csv',
 'benign_ambient_dyno_drive_extended_short_window_size_10.0_offset_10.0.csv',
 'benign_ambient_dyno_drive_radio_infotainment_window_size_10.0_offset_10.0.csv',
 'benign_ambient_dyno_drive_winter_window_size_10.0_offset_10.0.csv',
 'benign_ambient_dyno_exercise_all_bits_window_size_10.0_offset_10.0.csv',
 'benign_ambient_dyno_idle_radio_infotainment_window_size_10.0_offset_10.0.csv',
 'benign_ambient_dyno_reverse_window_size_10.0_offset_10.0.csv']

Load All CSV Files into DataFrames:

In [3]:
# Load all CSV files into DataFrames
dataframes = [pd.read_csv(os.path.join(csv_directory, csv_file)) for csv_file in csv_files]


In [6]:
# Determine the filename of the CSV file with the maximum number of rows
idx_max_rows = max(range(len(dataframes)), key=lambda i: dataframes[i].shape[0])

# Get the filename of the CSV file with the maximum number of rows
csv_max_rows = csv_files[idx_max_rows]

print(f"CSV with the maximum number of rows: {csv_max_rows}")
print(f"Number of rows: {dataframes[idx_max_rows].shape[0]}")


CSV with the maximum number of rows: benign_ambient_dyno_exercise_all_bits_window_size_10.0_offset_10.0.csv
Number of rows: 217


Pad Each DataFrame:

In [7]:
# Define max_rows
max_rows = dataframes[idx_max_rows].shape[0]

# Replace NaN values with 0 for each dataframe
dataframes = [df.fillna(0) for df in dataframes]

# Pad each DataFrame to have the same number of rows as max_rows
padded_dfs = []
for df in dataframes:
    # Calculate the number of rows to pad
    num_rows_to_pad = max_rows - df.shape[0]
    
    # Create a DataFrame with zeros for padding
    df_padding = pd.DataFrame(0, index=range(num_rows_to_pad), columns=df.columns)
    
    # Concatenate the original DataFrame with the padding
    df_padded = pd.concat([df, df_padding], axis=0)
    
    # Append the padded DataFrame to the list
    padded_dfs.append(df_padded)

# Concatenate all padded DataFrames
final_df = pd.concat(padded_dfs, axis=0, ignore_index=True)


Concatenate DataFrames and Save:

In [8]:
# Display the final DataFrame
print(final_df)

                                              Embedding  Label
0     [ 6.14365801e-03 -1.44333249e-02  1.34322824e-...      0
1     [ 7.83259893e-03 -8.07137362e-06  2.13869899e-...      0
2     [ 2.10441982e-02  7.73854195e-04  1.55863106e-...      0
3     [ 1.19718282e-02 -5.85970540e-03  2.04822147e-...      0
4     [ 1.55465428e-02 -7.42801368e-04  1.99866048e-...      0
...                                                 ...    ...
2165                                                  0      0
2166                                                  0      0
2167                                                  0      0
2168                                                  0      0
2169                                                  0      0

[2170 rows x 2 columns]


In [9]:
# Save the final DataFrame to a CSV file
output_filename = "benign_all_w10_off_10.csv"
final_df.to_csv(os.path.join(csv_directory, output_filename), index=False)

print(f"Final concatenated DataFrame saved to {output_filename}.")


Final concatenated DataFrame saved to benign_all_w10_off_10.csv.
