In [None]:
import pandas as pd
import tsfel
import os
from glob import glob

# The root directory of the dataset
main_directory = "D:\Parkinson's\\Filtered_Data_9_to_5_updated"
# The directory where the feature extraction files will be saved
output_directory = "D:\Parkinson's\Features_Extraction_5mins_overlap"

# Ensure the output directory exists
os.makedirs(output_directory, exist_ok=True)

# Get all CSV files in the main directory that match the filtered pattern
file_paths = glob(os.path.join(main_directory, "*_filtered.csv"))

# Define a function to process each file
def process_file(file_path):
    # Extract the base file name without extension to use in output file name
    base_name = os.path.basename(file_path).replace('_filtered.csv', '')
    
    # Load the data
    df = pd.read_csv(file_path)
    df.drop(['Datetime_PST', 'EuclideanDistance'], axis=1, inplace=True)
    
    # Convert 'Timestamp' from milliseconds to 'datetime64[ns]' to calculate the sampling rate
    timestamps = pd.to_datetime(df['Timestamp'], unit='ms')
    sampling_rate = 1 / (timestamps.diff().dt.total_seconds().median())
    
    # Calculate the number of samples in a 300-second window
    window_size = int(sampling_rate * 300) if sampling_rate > 0 else 1
    
    # Calculate step size for 50% overlap
    step_size = window_size // 2

    # Extract features using TSFEL
    cfg = tsfel.get_features_by_domain()
    
    # Initialize a DataFrame to store all features
    all_features = pd.DataFrame()

    if sampling_rate > 0:
        # Iterate over the dataset with a sliding window approach
        for start in range(0, len(df), step_size):
            end = start + window_size
            if end > len(df):
                break
            segment = df.iloc[start:end].reset_index(drop=True)

            # Perform feature extraction on the current segment if it is not empty
            if not segment.empty:
                try:
                    features = tsfel.time_series_features_extractor(cfg, segment['FilteredEuclideanDistance'], fs=sampling_rate)
                    features.rename(columns=lambda x: x.replace('0_', 'FilteredEuclideanDistance_'), inplace=True)
                    all_features = pd.concat([all_features, features], ignore_index=True)
                except Exception as e:
                    print(f"Error extracting features for segment {start}-{end}: {e}")
                    continue
            else:
                print(f"Segment {start}-{end} is empty or not formatted correctly.")
                continue

        # Save the features to a CSV file without applying variance threshold filtering
        output_file_name = f"{base_name}_TSFEL_output.csv"
        output_file_path = os.path.join(output_directory, output_file_name)
        all_features.to_csv(output_file_path, index=False)

        print(f"Features extracted and saved to {output_file_path}")
    else:
        print(f"Sampling rate for file {file_path} is undefined or zero. Cannot extract features.")

# Process each file in the directory
for file_path in file_paths:
    print(f"*** Feature extraction started for {file_path} ***")
    process_file(file_path)
    print(f"*** Feature extraction finished for {file_path} ***")

