In [1]:
import pandas as pd
from scipy.signal import butter, filtfilt
import os
import re
from datetime import datetime

# Define Butterworth filter functions
def butter_lowpass_filter(data, cutoff, fs, order=4):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return filtfilt(b, a, data)

# Function to find the date in the file name
def find_date(filename):
    date_pattern = re.compile(r'\d{6,8}')
    match = date_pattern.search(filename)
    return match.group(0) if match else None

# Directories
input_directory = r"D:\Parkinson's\Euclidean_Norm\Resampled Eucledian Distance_2019"
output_directory = r"D:\Parkinson's\Filtered Data"

# Ensure the output directory exists
os.makedirs(output_directory, exist_ok=True)

# Parameters
cutoff_frequency_lp = 20 # Low-pass filter cutoff frequency
order = 4  # Filter order

# Get all files from the input directory
all_files = [f for f in os.listdir(input_directory) if os.path.isfile(os.path.join(input_directory, f))]

for filename in all_files:
    # Construct input and output file paths
    file_path = os.path.join(input_directory, filename)
    
    # Extract the main part of the filename for use in the output filename
    main_part_of_filename = '_'.join(filename.split('_')[:2])  # This assumes the format is always like '3002_03152019_...'
    date_str = find_date(filename)
    if date_str is None:
        print(f"No date found in filename {filename}")
        continue
    
    output_file_path = os.path.join(output_directory, f"{main_part_of_filename}_filtered.csv")
    
    # Load the data
    df = pd.read_csv(file_path)
    
    # Calculate the sampling rate from timestamps
    timestamps = pd.to_datetime(df['Timestamp'], unit='ms')
    sampling_rate = 1 / (timestamps.diff().dt.total_seconds().median())
    
    # Low-pass filter the acceleration data
    df['FilteredEuclideanDistance'] = butter_lowpass_filter(df['EuclideanDistance'], cutoff_frequency_lp, sampling_rate, order)
    
    # Save the filtered data to a new CSV file
    df.to_csv(output_file_path, index=False)

    print(f"Processed and saved filtered data for file {filename} to {output_file_path}")



Processed and saved filtered data for file 12593_03182019_Euclidean_Norms_output.csv to D:\Parkinson's\Filtered Data\12593_03182019_filtered.csv
Processed and saved filtered data for file 13039_10012019_Euclidean_Norms_output.csv to D:\Parkinson's\Filtered Data\13039_10012019_filtered.csv
Processed and saved filtered data for file 14281_02192019_Euclidean_Norms_output.csv to D:\Parkinson's\Filtered Data\14281_02192019_filtered.csv
Processed and saved filtered data for file 14331_06052019_Euclidean_Norms_output.csv to D:\Parkinson's\Filtered Data\14331_06052019_filtered.csv
Processed and saved filtered data for file 18567_03202019_Euclidean_Norms_output.csv to D:\Parkinson's\Filtered Data\18567_03202019_filtered.csv
Processed and saved filtered data for file 3002_03152019_Euclidean_Norms_output.csv to D:\Parkinson's\Filtered Data\3002_03152019_filtered.csv
Processed and saved filtered data for file 3003_03292019_Euclidean_Norms_output.csv to D:\Parkinson's\Filtered Data\3003_03292019_fi