<a href="https://colab.research.google.com/github/jinhayoon/CEMCJ2019/blob/master/shimmer_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import pandas as pd
import numpy as np
from scipy import signal
from sklearn.preprocessing import StandardScaler

class ShimmerDataProcessor:
    def __init__(self, folder_path, output_path):
        self.folder_path = folder_path
        self.output_path = output_path
        self.ppt_data = []

    def load_data(self, file_path):
        #load CSV and set timestamp column as index
        data = pd.read_csv(file_path)
        data.set_index('Shimmer_D2B1_TimestampSync_FormattedUnix_CAL', inplace=True)
        return data

    def resample_data(self, data, target_frequency='1S'):
        #resample data to target frequency (1-second frequency)
        data = data.resample(target_frequency).mean() #calculates mean within each resampling interval
        return data

    def apply_filter(self, data, cutoff_frequency=0.05, filter_order=4):
        # Apply a low-pass Butterworth filter to each column of the data
        fs = 1.0 #sampling frequency
        b, a = signal.butter(filter_order, cutoff_frequency / (0.5 * fs), btype='low', analog=False)
        filtered_data = pd.DataFrame()
        for column in data.columns:
            filtered_data[column] = signal.filtfilt(b, a, data[column]) #apply filter to each column
        return filtered_data

    def standardize_data(self, data):
        # Standardize (normally distributed) data using sklearn's StandardScaler
        scaler = StandardScaler()
        return pd.DataFrame(scaler.fit_transform(data), columns=data.columns)

    def extract_participant_id(self, file_name):
        # Extract participant ID from the file name (modify based on naming convention)
        #return file_name.split('_')[0]
        print(f"Processing CSV file: {file_name}")
        start_index = self.index("Session") + len("Session")
        end_index = self.index("_Shimmer")
        PID = file_name[start_index:end_index]
        print("extracted PID", PID)
        return PID

    def identify_event_times(self, data):
        # Assuming data is already resampled to 1-second intervals
        event_start_time_1 = data.index[0]  # Start of the data
        event_end_time_1 = data.index[105]  # 1 minute and 45 seconds (105 seconds)

        event_start_time_2 = data.index[106]  # Start of the second interval
        event_end_time_2 = data.index[-1]  # End of the data

        return (event_start_time_1, event_end_time_1), (event_start_time_2, event_end_time_2)


    def process_file(self, file_path):
        participant_id = self.extract_participant_id(os.path.basename(file_path))
        data = self.load_data(file_path)
        data = self.resample_data(data)
        filtered_data = self.apply_filter(data)
        standardized_data = self.standardize_data(filtered_data)

        # Identify event start and end times dynamically
        (event_start_time_1, event_end_time_1), (event_start_time_2, event_end_time_2) = self.identify_event_times(data)

        # Extract data within the specified event time windows
        event_data_1 = standardized_data.loc[event_start_time_1:event_end_time_1]
        event_data_2 = standardized_data.loc[event_start_time_2:event_end_time_2]

        # Calculate cumulative values for skin conductance, skin resistance, and heart rate for each event
        cumulative_values_1 = {
            'ParticipantID': participant_id,
            'EventStartTime': event_start_time_1,
            'EventEndTime': event_end_time_1,
            'Conductance': event_data_1['GSR'].sum(),
            'Resistance': event_data_1['GSR'].mean(),
            'HeartRate': event_data_1['HR'].mean()
        }

        cumulative_values_2 = {
            'ParticipantID': participant_id,
            'EventStartTime': event_start_time_2,
            'EventEndTime': event_end_time_2,
            'Conductance': event_data_2['GSR'].sum(),
            'Resistance': event_data_2['GSR'].mean(),
            'HeartRate': event_data_2['HR'].mean()
        }

        self.participant_data.append(cumulative_values_1)
        self.participant_data.append(cumulative_values_2)

    def process_folder(self):
        # Process all files in the specified folder
        for file_name in os.listdir(self.folder_path):
            if file_name.endswith('.csv'):
                file_path = os.path.join(self.folder_path, file_name)
                self.process_file(file_path)

    def save_output(self):
        # Save processed data to a CSV file
        output_df = pd.DataFrame(self.participant_data)
        output_df.to_csv(self.output_path, index=False)


if __name__ == '__main__':
    input_path = '/Users/jinhayoon/Desktop/TOCHI/physio/healthcareshim'

    timestamps = ['00:00:00', '00:01:45']

    original_headers = ['Shimmer_D2B1_TimestampSync_FormattedUnix_CAL',
                        'Shimmer_4A81_GSR_Skin_Conductance_CAL',
                        'Shimmer_4A81_GSR_Skin_Resistance_CAL',
                        'Shimmer_4A81_PPG_A13_CAL']

    new_headers = ['PID', 'BASELINE_CONDUCTANCE', 'BASELINE_RESISTANCE', 'BASELINE_HEARTRATE',
                   'CON_CONDUCTANCE', 'CON_RESISTANCE', 'CON_HEARTRATE']

    output_path = '/Users/jinhayoon/Desktop/TOCHI/physio/healthcare_output'

    # Create an instance of ShimmerDataProcessor
    shimmer_processor = ShimmerDataProcessor(input_path, output_path)

    # Process all files in the specified folder
    shimmer_processor.process_folder()

    # Save the processed data to a CSV file
    shimmer_processor.save_output()

FileNotFoundError: ignored