In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/epileptic-four-feature/test_wpli2_debiased_features.csv
/kaggle/input/epileptic-four-feature/test_hfd_features.csv
/kaggle/input/epileptic-four-feature/train_perm_entropy_features.csv
/kaggle/input/epileptic-four-feature/train_combined_power_features.csv
/kaggle/input/epileptic-four-feature/test_combined_power_features.csv
/kaggle/input/epileptic-four-feature/test_perm_entropy_features.csv
/kaggle/input/epileptic-four-feature/train_hfd_features.csv
/kaggle/input/epileptic-four-feature/train_wpli2_debiased_features.csv


In [2]:
import pandas as pd
import os
import sys

def merge_datasets(file_list, output_name):
    print(f"--- Merging {output_name} ---")
    
    # 1. Read the first file to initialize the master dataframe
    if not os.path.exists(file_list[0]):
        print(f"FATAL: File not found {file_list[0]}")
        return
    
    master_df = pd.read_csv(file_list[0])
    print(f"Initialized with {file_list[0]}: {master_df.shape}")

    # 2. Iteratively merge the remaining files
    for filepath in file_list[1:]:
        if not os.path.exists(filepath):
            print(f"FATAL: File not found {filepath}")
            return
        
        current_df = pd.read_csv(filepath)
        
        # Check if required keys exist
        if 'subject_id' not in current_df.columns or 'label' not in current_df.columns:
            print(f"ERROR: {filepath} is missing 'subject_id' or 'label' columns.")
            sys.exit(1)

        # MERGE STEP:
        # specific 'on' keys ensure we align data by Subject ID and Label.
        # 'how=inner' ensures we only keep subjects present in ALL files (drops incomplete data).
        master_df = pd.merge(master_df, current_df, on=['subject_id', 'label'], how='inner')
        
        print(f"Merged {filepath}. New shape: {master_df.shape}")

    # 3. Save Result
    master_df.to_csv(output_name, index=False)
    print(f"SUCCESS: Saved {output_name} with final shape {master_df.shape}\n")

def main():
    # Define the file groups explicitly
    train_files = [
        "/kaggle/input/epileptic-four-feature/train_combined_power_features.csv",
        "/kaggle/input/epileptic-four-feature/train_hfd_features.csv",
        "/kaggle/input/epileptic-four-feature/train_perm_entropy_features.csv",
        "/kaggle/input/epileptic-four-feature/train_wpli2_debiased_features.csv"
    ]

    test_files = [
        "/kaggle/input/epileptic-four-feature/test_combined_power_features.csv",
        "/kaggle/input/epileptic-four-feature/test_hfd_features.csv",
        "/kaggle/input/epileptic-four-feature/test_perm_entropy_features.csv",
        "/kaggle/input/epileptic-four-feature/test_wpli2_debiased_features.csv"
    ]

    # Run the merge
    merge_datasets(train_files, "master_train.csv")
    merge_datasets(test_files, "master_test.csv")

if __name__ == "__main__":
    main()

--- Merging master_train.csv ---
Initialized with /kaggle/input/epileptic-four-feature/train_combined_power_features.csv: (169, 212)
Merged /kaggle/input/epileptic-four-feature/train_hfd_features.csv. New shape: (169, 233)
Merged /kaggle/input/epileptic-four-feature/train_perm_entropy_features.csv. New shape: (169, 254)
Merged /kaggle/input/epileptic-four-feature/train_wpli2_debiased_features.csv. New shape: (169, 1304)
SUCCESS: Saved master_train.csv with final shape (169, 1304)

--- Merging master_test.csv ---
Initialized with /kaggle/input/epileptic-four-feature/test_combined_power_features.csv: (43, 212)
Merged /kaggle/input/epileptic-four-feature/test_hfd_features.csv. New shape: (43, 233)
Merged /kaggle/input/epileptic-four-feature/test_perm_entropy_features.csv. New shape: (43, 254)
Merged /kaggle/input/epileptic-four-feature/test_wpli2_debiased_features.csv. New shape: (43, 1304)
SUCCESS: Saved master_test.csv with final shape (43, 1304)

