Authors: Léandre Dubey
: creation: 07.04.2024
and Olivia Lecomte
: modified on: 08.05.2024

In [1]:
# import sys
# !{sys.executable} -m pip install statsmodels
# !{sys.executable} -m pip install pyedflib
# !{sys.executable} -m pip install mne

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import HourLocator, DateFormatter
import datetime
from scipy import signal
import mne

# todo: update root folder
root_folder = r'.\Data'

# EMFIT19

In [2]:
#Just for emfit19
def process_file_emfit19(file_path, participant_id, plot=False):
    participant_id = 'SMS_020'
    file_path = os.path.join(root_folder, participant_id, "EMFIT_001519", "SMS_020-psg-EMFIT_001519-processed_vitals.csv")
    df_emfit19 = pd.read_csv(file_path)
    df_emfit19['timestamp'] = df_emfit19['timestamp'].apply(datetime.datetime.fromtimestamp)
    df_emfit19_filtered = df_emfit19[(df_emfit19['timestamp'].dt.hour >= 22) | (df_emfit19['timestamp'].dt.hour < 7)]
    df_emfit19_filtered.set_index('timestamp', inplace=True)

    normalized_activity = (df_emfit19_filtered['act'] - df_emfit19_filtered['act'].min()) / (df_emfit19_filtered['act'].max() - df_emfit19_filtered['act'].min())

    if plot:
        # Plot 'act' against 'timestamp'
        fig, ax = plt.subplots()
        ax.plot(df_emfit19_filtered.index, normalized_activity)
        ax.set_xlabel('Time')
        ax.set_ylabel('Normalized Activity Level')
        ax.set_title(f'Normalized Activity Level Between 22:00 and 07:00 for emfit19 Participant {participant_id}')

        # Set x-axis ticks as hours
        hours = HourLocator(interval=1)
        hour_format = DateFormatter("%H:%M")
        ax.xaxis.set_major_locator(hours)
        ax.xaxis.set_major_formatter(hour_format)
        plt.xticks(rotation=45)

        plt.grid(True)
        plt.show()

    return df_emfit19_filtered.index, normalized_activity

# Polysomnography

In [3]:
#Just for psg
import os
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import HourLocator, DateFormatter

# Function to process each file
def process_file_psg(file_path, participant_id, plot=False):
    with open(file_path, 'r') as file:
        # Skip the first line
        next(file)
        lines = file.readlines()

    data_lines = [line.strip().split(';') for line in lines if ';' in line]

    data = {'time': [], 'activity': []}
    for data_line in data_lines:
        data['time'].append(data_line[0])
        # Remove any non-numeric characters from the activity data
        activity = ''.join(filter(str.isdigit, data_line[1]))
        data['activity'].append(activity)

    df = pd.DataFrame(data)

    try:
        datetime_format = '%d.%m.%Y %H:%M:%S,%f'
        df['time'] = pd.to_datetime(df['time'], format=datetime_format)
    except ValueError:
        datetime_format = '%H:%M:%S,%f'
        df['time'] = pd.to_datetime(df['time'], format=datetime_format)
    df.set_index('time', inplace=True)

    # Filter rows where the time is between 22:00 and 07:00
    filtered_df = df.between_time('22:00', '07:00')

    # Convert 'activity' column to numeric type
    filtered_df.loc[:,'activity'] = pd.to_numeric(filtered_df.loc[:,'activity'])

    # Normalize the 'activity' data
    normalized_activity = (filtered_df['activity'] - filtered_df['activity'].min()) / (
                filtered_df['activity'].max() - filtered_df['activity'].min())

    if plot:
        # Plotting
        # todo: add subplot axes to plot side by side or plot separately
        fig, ax = plt.subplots()
        ax.plot(filtered_df.index, normalized_activity)
        ax.set_xlabel('Time')
        ax.set_ylabel('Normalized Activity Level')
        ax.set_title(f'Normalized Activity Level Between 22:00 and 07:00 for Participant {participant_id}')

        # Set x-axis ticks as hours
        hours = HourLocator(interval=1)
        hour_format = DateFormatter("%H:%M")
        ax.xaxis.set_major_locator(hours)
        ax.xaxis.set_major_formatter(hour_format)
        plt.xticks(rotation=45)

        plt.grid(True)
        plt.show()

    return filtered_df.index, normalized_activity



# Plots: Polysomnography and EMFIT19

In [None]:
# Main loop to process files
for folder in os.listdir(root_folder):
    # Check if it is a patient folder
    if (not folder.startswith("SMS")):
        continue
    participant_id = folder  # The folder name is the participant id
    if os.path.exists(os.path.join(root_folder, participant_id, "Somnomedics")):
        file_path_psg = os.path.join(root_folder, participant_id, "Somnomedics", "Act.txt")
        x_psg, y_psg = process_file_psg(file_path_psg, participant_id)
        fig, ax = plt.subplots(1, 2, figsize= (18,6))
        ax[0].plot(x_psg, y_psg)
        ax[0].set_xlabel('Time')
        ax[0].set_ylabel('Normalized Activity Level')
        ax[0].set_title(f'Normalized Activity Level (PSG) Between 22:00 and 07:00 for Participant {participant_id}')

        # Set x-axis ticks as hours
        hours = HourLocator(interval=1)
        hour_format = DateFormatter("%H:%M")
        ax[0].xaxis.set_major_locator(hours)
        ax[0].xaxis.set_major_formatter(hour_format)
        ax[0].grid(True)
        plt.xticks(rotation=45)

    if os.path.exists(os.path.join(root_folder, participant_id, "EMFIT_001519")):
        file_path_emfit19 = os.path.join(root_folder, participant_id, "EMFIT_001519", "SMS_020-psg-EMFIT_001519-processed_vitals.csv")
        x_emfit19, y_emfit_19 = process_file_emfit19(file_path_psg, participant_id)
        ax[1].plot(x_emfit19, y_emfit_19)
        ax[1].set_xlabel('Time')
        ax[1].set_ylabel('Normalized Activity Level')
        ax[1].set_title(f'Normalized Activity Level (EMFIT19) Between 22:00 and 07:00 for Participant {participant_id}')

        # Set x-axis ticks as hours
        hours = HourLocator(interval=1)
        hour_format = DateFormatter("%H:%M")
        ax[1].xaxis.set_major_locator(hours)
        ax[1].xaxis.set_major_formatter(hour_format)
        plt.xticks(rotation=45)

        plt.grid(True)
        plt.show()

# Plots: Polysomnography over EMFIT 19 and cross-correlations

In [None]:
#Plot emfit19 on top of psg
import os
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import HourLocator, DateFormatter
import datetime
from scipy import signal

# Function to process each file
def process_file_psg_emfit19(file_path_emfit19,file_path_psg, participant_id):
    try:
        df_emfit19 = pd.read_csv(file_path_emfit19)
        df_emfit19['timestamp'] = df_emfit19['timestamp'].apply(datetime.datetime.fromtimestamp)
        df_emfit19_filtered = df_emfit19[(df_emfit19['timestamp'].dt.hour >= 22) | (df_emfit19['timestamp'].dt.hour < 7)]
        df_emfit19_filtered.set_index('timestamp', inplace=True)

        normalized_activity_emfit19 = (df_emfit19_filtered['act'] - df_emfit19_filtered['act'].min()) / (df_emfit19_filtered['act'].max() - df_emfit19_filtered['act'].min())
    except FileNotFoundError:
        print(f"File doesn't exist for emfit 19, participant {participant_id}")
        return

    with open(file_path_psg, 'r') as file:
        # Skip the first line
        next(file)
        lines = file.readlines()

    data_lines = [line.strip().split(';') for line in lines if ';' in line]

    data = {'time': [], 'activity': []}
    for data_line in data_lines:
        data['time'].append(data_line[0])
        # Remove any non-numeric characters from the activity data
        activity = ''.join(filter(str.isdigit, data_line[1]))
        data['activity'].append(activity)

    df = pd.DataFrame(data)

    try:
        datetime_format = '%d.%m.%Y %H:%M:%S,%f'
        df['time'] = pd.to_datetime(df['time'], format=datetime_format)
    except ValueError:
        datetime_format = '%H:%M:%S,%f'
        df['time'] = pd.to_datetime(df['time'], format=datetime_format)
    df.set_index('time', inplace=True)

    # Filter rows where the time is between 22:00 and 07:00
    filtered_df_psg = df.between_time('22:00', '07:00')

    # Convert 'activity' column to numeric type
    filtered_df_psg.loc[:,'activity'] = pd.to_numeric(filtered_df_psg.loc[:,'activity'])

    # Normalize the 'activity' data
    normalized_activity_psg = (filtered_df_psg['activity'] - filtered_df_psg['activity'].min()) / (
                filtered_df_psg['activity'].max() - filtered_df_psg['activity'].min())


    normalized_activity_psg = pd.to_numeric(normalized_activity_psg)
    normalized_activity_emfit19 = pd.to_numeric(normalized_activity_emfit19)

    norm_psg_values = normalized_activity_psg.dropna().values
    norm_emfit19_values = normalized_activity_emfit19.values


    # todo: add cross correlations using scipy (https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.correlate.html)
    # todo: realign signals
    # Determine the length of the aligned signals
    max_len = max(len(norm_psg_values), len(norm_emfit19_values))
    
    # # Pad signals to the same length
    # psg_padded = np.pad(norm_psg_values, (0, max_len - len(norm_psg_values)), mode='constant')
    # emfit19_padded = np.pad(norm_emfit19_values, (0, max_len - len(norm_emfit19_values)), mode='constant')
    
    # Cross-correlation
    corr = signal.correlate(norm_psg_values, norm_emfit19_values, mode='same')
    lags = signal.correlation_lags(len(norm_psg_values), len(norm_emfit19_values), mode='same')
    corr /= np.max(corr)
    
    # max_corr = np.max(corr)
    
    # Find the index corresponding to the maximum correlation coefficient
    max_corr_index = np.argmax(corr)
    lag_for_alignment = lags[max_corr_index]

    # Alignment
    if lag_for_alignment > 0:
        aligned_psg_values = norm_psg_values[:max_len]
        aligned_emfit19_values = np.pad(norm_emfit19_values, (lag_for_alignment, 0), mode='constant')[:max_len]
    elif lag_for_alignment < 0:
        aligned_psg_values = np.pad(norm_psg_values, (-lag_for_alignment, 0), mode='constant')[:max_len]
        aligned_emfit19_values = norm_emfit19_values[:max_len]
    else:
        aligned_psg_values = norm_psg_values[:max_len]
        aligned_emfit19_values = norm_emfit19_values[:max_len]
        
        
    # cross-correlation
    corr_a = signal.correlate(aligned_psg_values, aligned_emfit19_values, mode='same')
    lags_a = signal.correlation_lags(len(aligned_psg_values), len(aligned_emfit19_values), mode='same')
    corr_a /= np.max(corr_a)

    print(f'Participant {participant_id}\nCorrelation without re-alignment (correlation at lags = 0) {corr[lags==0]}\nCorrelation after re-alignment (correlation at lags = 0) {corr_a[lags_a==0]}')  # Print mean correlation

    all_corr_wo_alignment.append(corr[lags==0])
    all_corr_after_alignment.append(corr_a[lags_a==0])


    # Calculate mean correlation
    # mean_corr = np.mean(corr_a) # doesn't make sense to do this
    # print(f'Participant {participant_id}\nMaximum Correlation {max_corr}\nMean Correlation {mean_corr} for emfit19')  # Print mean correlation

    # emfit19_corr.append(mean_corr)

    # todo: plots

    # Plotting
    fig, ax = plt.subplots(1,3, figsize = (18, 6))
    ax[0].plot(df_emfit19_filtered.index, normalized_activity_emfit19, color='orange', label='emfit19')
    ax[0].plot(filtered_df_psg.index, normalized_activity_psg, color='blue', label='PSG')
    
    ax[1].plot(lags, corr)

    ax[2].plot(lags_a, corr_a)


    # Set labels and title
    ax[0].set_xlabel('Time')
    ax[0].set_ylabel('Normalized Activity Level')
    ax[0].set_title(f'Normalized Activity Level Between 22:00 and 07:00 \nfor Participant {participant_id}')
    ax[0].legend()

    # Set x-axis ticks to every hour
    hours = HourLocator(interval=1)
    hour_format = DateFormatter("%H:%M")
    ax[0].xaxis.set_major_locator(hours)
    ax[0].xaxis.set_major_formatter(hour_format)
    # plt.xticks(rotation=45)

    # set title and labels
    ax[1].set_title(f'Cross-Correlated Signal for Participant {participant_id}')
    ax[1].set_xlabel('Lag')
    ax[1].set_ylabel('r')

    # Add labels and title
    ax[2].set_title(f'Aligned Cross-Correlated Signal for Participant {participant_id}')
    ax[2].set_xlabel('Lag')
    ax[2].set_ylabel('r')


    plt.grid(True)
    plt.show()
    

# Main loop to process files
emfit19_corr = []
total_participants = 0
has_psg_and_emfit19_folder = 0
all_corr_wo_alignment = []
all_corr_after_alignment = []

for folder in os.listdir(root_folder):
    total_participants += 1
    # Check if it is a patient folder
    if (not folder.startswith("SMS")):
        continue
    participant_id = folder  # The folder name is the participant id
    file_path_psg = os.path.join(root_folder, participant_id, "Somnomedics", "Act.txt")
    file_path_emfit19 = os.path.join(root_folder, participant_id, "EMFIT_001519", f"{participant_id}-psg-EMFIT_001519-processed_vitals.csv")
    problematic_patients = ['SMS_114','SMS_113','SMS_111','SMS_098','SMS_092','SMS_043']
    if os.path.exists(file_path_emfit19) and os.path.exists(file_path_psg) and participant_id not in problematic_patients:
        process_file_psg_emfit19(file_path_emfit19, file_path_psg, participant_id)
        has_psg_and_emfit19_folder +=1
    elif participant_id in problematic_patients: print(f'Problematic patient encountered: {participant_id}')
    else:
        print(f"Files missing for participant {participant_id}")

# print(f'Average correlation for EMFIT19 is {np.mean(emfit19_corr)}')
print(f'Total number of participants:{total_participants}\nTotal number of participants compared for PSG and emfit19: {has_psg_and_emfit19_folder}')
print(f'Number of omitted participants: {len(problematic_patients)}')
print(f'Average correlation before alignment: {np.mean(all_corr_wo_alignment)}, std: {np.std(all_corr_wo_alignment)}')
print(f'Average correlation after alignment: {np.mean(all_corr_after_alignment)}, std: {np.std(all_corr_after_alignment)}')

# Plots: Polysomnography over EMFIT 05 and cross-correlations

In [None]:
#Plot emfit05 on top of psg
import os
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import HourLocator, DateFormatter
import datetime


# Function to process each file
def process_file_psg_emfit05(file_path_emfit05, file_path_psg, participant_id):
    try:
        df_emfit05 = pd.read_csv(file_path_emfit05)
        df_emfit05['timestamp'] = df_emfit05['timestamp'].apply(datetime.datetime.fromtimestamp)
        df_emfit05_filtered = df_emfit05[
            (df_emfit05['timestamp'].dt.hour >= 22) | (df_emfit05['timestamp'].dt.hour < 7)]
        df_emfit05_filtered.set_index('timestamp', inplace=True)

        normalized_activity_emfit05 = (df_emfit05_filtered['act'] - df_emfit05_filtered['act'].min()) / (
                    df_emfit05_filtered['act'].max() - df_emfit05_filtered['act'].min())
    except FileNotFoundError:
        print(f"File doesn't exist for emfit 05, participant {participant_id}")
        return

    with open(file_path_psg, 'r') as file:
        # Skip the first line
        next(file)
        lines = file.readlines()

    data_lines = [line.strip().split(';') for line in lines if ';' in line]

    data = {'time': [], 'activity': []}
    for data_line in data_lines:
        data['time'].append(data_line[0])
        # Remove any non-numeric characters from the activity data
        activity = ''.join(filter(str.isdigit, data_line[1]))
        data['activity'].append(activity)

    df = pd.DataFrame(data)

    try:
        datetime_format = '%d.%m.%Y %H:%M:%S,%f'
        df['time'] = pd.to_datetime(df['time'], format=datetime_format)
    except ValueError:
        datetime_format = '%H:%M:%S,%f'
        df['time'] = pd.to_datetime(df['time'], format=datetime_format)
    df.set_index('time', inplace=True)

    # Filter rows where the time is between 22:00 and 07:00
    filtered_df_psg = df.between_time('22:00', '07:00')

    # Convert 'activity' column to numeric type
    filtered_df_psg.loc[:, 'activity'] = pd.to_numeric(filtered_df_psg.loc[:, 'activity'])

    # Normalize the 'activity' data
    normalized_activity_psg = (filtered_df_psg['activity'] - filtered_df_psg['activity'].min()) / (
            filtered_df_psg['activity'].max() - filtered_df_psg['activity'].min())


    normalized_activity_psg = pd.to_numeric(normalized_activity_psg)
    normalized_activity_emfit05 = pd.to_numeric(normalized_activity_emfit05)

    norm_psg_values = normalized_activity_psg.dropna().values
    norm_emfit05_values = normalized_activity_emfit05.values

    # todo: add cross correlations using scipy (https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.correlate.html)
    # todo: realign signals
    # Determine the length of the aligned signals
    max_len = max(len(norm_psg_values), len(norm_emfit05_values))
    
    # # Pad signals to the same length
    # psg_padded = np.pad(norm_psg_values, (0, max_len - len(norm_psg_values)), mode='constant')
    # emfit05_padded = np.pad(norm_emfit05_values, (0, max_len - len(norm_emfit05_values)), mode='constant')
    
    # Cross-correlation
    corr = signal.correlate(norm_psg_values, norm_emfit05_values, mode='same')
    lags = signal.correlation_lags(len(norm_psg_values), len(norm_emfit05_values), mode='same')
    corr /= np.max(corr)
    
    # max_corr = np.max(corr)
    
    # Find the index corresponding to the maximum correlation coefficient
    max_corr_index = np.argmax(corr)
    lag_for_alignment = lags[max_corr_index]

    # Alignment
    if lag_for_alignment > 0:
        aligned_psg_values = norm_psg_values[:max_len]
        aligned_emfit05_values = np.pad(norm_emfit05_values, (lag_for_alignment, 0), mode='constant')[:max_len]
    elif lag_for_alignment < 0:
        aligned_psg_values = np.pad(norm_psg_values, (-lag_for_alignment, 0), mode='constant')[:max_len]
        aligned_emfit05_values = norm_emfit05_values[:max_len]
    else:
        aligned_psg_values = norm_psg_values[:max_len]
        aligned_emfit05_values = norm_emfit05_values[:max_len]
        
        
    # cross-correlation
    corr_a = signal.correlate(aligned_psg_values, aligned_emfit05_values, mode='same')
    lags_a = signal.correlation_lags(len(aligned_psg_values), len(aligned_emfit05_values), mode='same')
    corr_a /= np.max(corr_a)

    print(f'Participant {participant_id}\nCorrelation without re-alignment (correlation at lags = 0) {corr[lags==0]}\nCorrelation after re-alignment (correlation at lags = 0) {corr_a[lags_a==0]}')  # Print mean correlation

    all_corr_wo_alignment_emfit05.append(corr[lags==0])
    all_corr_after_alignment_emfit05.append(corr_a[lags_a==0])


    # todo: plots

    # Plotting
    fig, ax = plt.subplots(1,3, figsize = (18, 6))
    ax[0].plot(df_emfit05_filtered.index, normalized_activity_emfit05, color='orange', label='emfit05')
    ax[0].plot(filtered_df_psg.index, normalized_activity_psg, color='blue', label='PSG')
    
    ax[1].plot(lags, corr)

    ax[2].plot(lags_a, corr_a)


    # Set labels and title
    ax[0].set_xlabel('Time')
    ax[0].set_ylabel('Normalized Activity Level')
    ax[0].set_title(f'Normalized Activity Level Between 22:00 and 07:00 \nfor Participant {participant_id}')
    ax[0].legend()

    # Set x-axis ticks to every hour
    hours = HourLocator(interval=1)
    hour_format = DateFormatter("%H:%M")
    ax[0].xaxis.set_major_locator(hours)
    ax[0].xaxis.set_major_formatter(hour_format)
    # plt.xticks(rotation=45)

    # set title and labels
    ax[1].set_title(f'Cross-Correlated Signal for Participant {participant_id}')
    ax[1].set_xlabel('Lag')
    ax[1].set_ylabel('r')

    # Add labels and title
    ax[2].set_title(f'Aligned Cross-Correlated Signal for Participant {participant_id}')
    ax[2].set_xlabel('Lag')
    ax[2].set_ylabel('r')


    plt.grid(True)
    plt.show()


# Main loop to process files
emfit05_corr = []
total_participants = 0
has_psg_and_emfit05_folder = 0
all_corr_wo_alignment_emfit05 = []
all_corr_after_alignment_emfit05 = []

for folder in os.listdir(root_folder):
    total_participants += 1
    # Check if it is a patient folder
    if (not folder.startswith("SMS")):
        continue
    participant_id = folder  # The folder name is the participant id
    file_path_psg = os.path.join(root_folder, participant_id, "Somnomedics", "Act.txt")
    file_path_emfit05 = os.path.join(root_folder, participant_id, "EMFIT_001505",
                                     f"{participant_id}-psg-EMFIT_001505-processed_vitals.csv")
    problematic_patients = ['SMS_175','SMS_173','SMS_170','SMS_161', 'SMS_114', 'SMS_113', 'SMS_111', 'SMS_098', 'SMS_092', 'SMS_043']
    if os.path.exists(file_path_emfit05) and os.path.exists(
            file_path_psg) and participant_id not in problematic_patients:
        process_file_psg_emfit05(file_path_emfit05, file_path_psg, participant_id)
        has_psg_and_emfit05_folder +=1
    elif participant_id in problematic_patients:
        print(f'Problematic patient encountered: {participant_id}')
    else:
        print(f"Files missing for participant {participant_id}")

# print(f'Average correlation for EMFIT05 is {np.mean(emfit05_corr)}')
print(f'Total number of participants:{total_participants}\nTotal number of participants compared for PSG and emfit05: {has_psg_and_emfit19_folder}')
print(f'Number of omitted participants: {len(problematic_patients)}')
print(f'Average correlation before alignment: {np.mean(all_corr_wo_alignment_emfit05)}, std: {np.std(all_corr_wo_alignment_emfit05)}')
print(f'Average correlation after alignment: {np.mean(all_corr_after_alignment_emfit05)}, std: {np.std(all_corr_after_alignment_emfit05)}')

# Plots: Polysomnograph over EMFIT 05 

In [None]:
#Plot emfit05 on top of psg
import os
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import HourLocator, DateFormatter
import datetime

# Function to process each file
def process_file_psg_emfit05(file_path_emfit05,file_path_psg, participant_id):
    try:
        df_emfit05 = pd.read_csv(file_path_emfit05)
        df_emfit05['timestamp'] = df_emfit05['timestamp'].apply(datetime.datetime.fromtimestamp)
        df_emfit05_filtered = df_emfit05[(df_emfit05['timestamp'].dt.hour >= 22) | (df_emfit05['timestamp'].dt.hour < 7)]
        df_emfit05_filtered.set_index('timestamp', inplace=True)

        normalized_activity_emfit05 = (df_emfit05_filtered['act'] - df_emfit05_filtered['act'].min()) / (df_emfit05_filtered['act'].max() - df_emfit05_filtered['act'].min())
    except FileNotFoundError:
        print(f"File doesn't exist for emfit 05, participant {participant_id}")
        return

    # with open(file_path_psg, 'r') as file:
    #     # Skip the first line
    #     next(file)
    #     lines = file.readlines()

    # data_lines = [line.strip().split(';') for line in lines if ';' in line]

    # data = {'time': [], 'activity': []}
    # for data_line in data_lines:
    #     data['time'].append(data_line[0])
    #     # Remove any non-numeric characters from the activity data
    #     activity = ''.join(filter(str.isdigit, data_line[1]))
    #     data['activity'].append(activity)

    # df = pd.DataFrame(data)

    # try:
    #     datetime_format = '%d.%m.%Y %H:%M:%S,%f'
    #     df['time'] = pd.to_datetime(df['time'], format=datetime_format)
    # except ValueError:
    #     datetime_format = '%H:%M:%S,%f'
    #     df['time'] = pd.to_datetime(df['time'], format=datetime_format)
    # df.set_index('time', inplace=True)

    # # Filter rows where the time is between 22:00 and 07:00
    # filtered_df_psg = df.between_time('22:00', '07:00')

    # # Convert 'activity' column to numeric type
    # filtered_df_psg.loc[:,'activity'] = pd.to_numeric(filtered_df_psg.loc[:,'activity'])

    # # Normalize the 'activity' data
    # normalized_activity_psg = (filtered_df_psg['activity'] - filtered_df_psg['activity'].min()) / (
    #             filtered_df_psg['activity'].max() - filtered_df_psg['activity'].min())

    # Plotting
    print(f'Participant {participant_id}')
    fig, ax = plt.subplots()
    ax.plot(df_emfit05_filtered.index, normalized_activity_emfit05)

    # Set labels and title
    ax.set_xlabel('Time')
    ax.set_ylabel('Normalized Activity Level')
    ax.set_title(f'Normalized Activity Level (emfit05) Between 22:00 and 07:00 for Participant {participant_id}')
    ax.legend()

    # Set x-axis ticks to every hour
    hours = HourLocator(interval=1)
    hour_format = DateFormatter("%H:%M")
    ax.xaxis.set_major_locator(hours)
    ax.xaxis.set_major_formatter(hour_format)
    plt.xticks(rotation=45)

    plt.grid(True)
    plt.show()

# Main loop to process files
for folder in os.listdir(root_folder):
    # Check if it is a patient folder
    if (not folder.startswith("SMS")):
        continue
    participant_id = folder  # The folder name is the participant id
    file_path_psg = os.path.join(root_folder, participant_id, "Somnomedics", "Act.txt")
    file_path_emfit05 = os.path.join(root_folder, participant_id, "EMFIT_001505", f"{participant_id}-psg-EMFIT_001505-processed_vitals.csv")
    problematic_patients = ['SMS_114','SMS_111','SMS_098','SMS_092','SMS_043']
    if os.path.exists(file_path_emfit05) and os.path.exists(file_path_psg) and participant_id not in problematic_patients:
        process_file_psg_emfit05(file_path_emfit05, file_path_psg, participant_id)
    elif participant_id in problematic_patients: print(f'Problematic patient encountered: {participant_id}')
    else:
        print(f"Files missing for participant {participant_id}")

# Plots: Empatica for each patient
1. Plot individual graphs to display motor activity data from wearable (Empatica, Fitbit) and nearable (EMFIT 05, EMFIT 19) sensors. (At least from one sensor).

In [9]:
def get_empatica_file(folder_id:str,sensor:str):
    try:
        file = pd.read_csv(os.path.join(root_folder, folder_id, 'Empatica', '%s-psg-Empatica-%s.csv' % (folder_id, sensor)))
        return file
    except Exception as e:
        print('No file %s' %str(e))
        return None

In [None]:
# check for Empatica folder
for folder in os.listdir(root_folder):
    if not folder.startswith("SMS_"):
        continue
    participant_id = folder
    problematic_patients = ['SMS_114','SMS_111','SMS_098','SMS_094','SMS_092','SMS_043']
    EMPATICA_SENSORS = ['ACC']
    if os.path.exists(os.path.join(root_folder, participant_id, "Empatica")) and participant_id not in problematic_patients:
        acc = get_empatica_file(participant_id, EMPATICA_SENSORS[0])
        # Convert timestamp to datetime
        acc['timestamp'] = pd.to_datetime(acc['timestamp'], unit='s', utc=True)

        # Calculate magnitude
        acc['magnitude'] = np.sqrt(acc['x'].pow(2) + acc['y'].pow(2) + acc['z'].pow(2))
        normalized_activity_empatica = df_empatica_filtered['act'] / np.median(df_empatica_filtered['act'])
        normalized_activity_empatica = np.clip(normalized_activity_empatica, 0, 1)
        normalized_activity_empatica = 1 - normalized_activity_empatica

        # Resample and plot
        plt.figure(figsize=(10, 6))

        acc.set_index('timestamp').resample('1s').mean()['magnitude'].plot()

        plt.title(f'Activity Over Time (Empatica) - {participant_id}')
        plt.xlabel('Timestamp')
        plt.ylabel('Activity')

        plt.show()



In [None]:
#Plot empatica on top of psg
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import HourLocator, DateFormatter
import datetime
from scipy import signal
import mne

# Function to process each file
def process_file_psg_empatica(file_path_empatica,file_path_psg, participant_id):
    try:
        df_empatica = pd.read_csv(file_path_empatica)
        df_empatica['timestamp'] = df_empatica['timestamp'].apply(datetime.datetime.fromtimestamp)
        df_empatica_filtered = df_empatica[(df_empatica['timestamp'].dt.hour >= 22) | (df_empatica['timestamp'].dt.hour < 7)]
        df_empatica_filtered.set_index('timestamp', inplace=True)
        df_empatica_filtered['act'] = np.sqrt(df_empatica_filtered['x']**2 + df_empatica_filtered['y']**2 + df_empatica_filtered['z']**2)

        # normalized_activity_empatica = (df_empatica_filtered['act'] - df_empatica_filtered['act'].min()) / (df_empatica_filtered['act'].max() - df_empatica_filtered['act'].min())
        normalized_activity_empatica = df_empatica_filtered['act'] / np.median(df_empatica_filtered['act'])
        normalized_activity_empatica = np.clip(normalized_activity_empatica, 0, 1)
        normalized_activity_empatica = 1 - normalized_activity_empatica
    except FileNotFoundError:
        print(f"File doesn't exist for emfit 05, participant {participant_id}")
        return

    # with open(file_path_psg, 'r') as file:
    #     # Skip the first line
    #     next(file)
    #     lines = file.readlines()

    # data_lines = [line.strip().split(';') for line in lines if ';' in line]

    # data = {'time': [], 'activity': []}
    # for data_line in data_lines:
    #     data['time'].append(data_line[0])
    #     # Remove any non-numeric characters from the activity data
    #     activity = ''.join(filter(str.isdigit, data_line[1]))
    #     data['activity'].append(activity)

    # df = pd.DataFrame(data)

    # try:
    #     datetime_format = '%d.%m.%Y %H:%M:%S,%f'
    #     df['time'] = pd.to_datetime(df['time'], format=datetime_format)
    # except ValueError:
    #     datetime_format = '%H:%M:%S,%f'
    #     df['time'] = pd.to_datetime(df['time'], format=datetime_format)
    # df.set_index('time', inplace=True)

    # # Filter rows where the time is between 22:00 and 07:00
    # filtered_df_psg = df.between_time('22:00', '07:00')

    # # Convert 'activity' column to numeric type
    # filtered_df_psg['activity'] = pd.to_numeric(filtered_df_psg['activity'])

    # # Normalize the 'activity' data
    # normalized_activity_psg = (filtered_df_psg['activity'] - filtered_df_psg['activity'].min()) / (
    #             filtered_df_psg['activity'].max() - filtered_df_psg['activity'].min())
    
    # normalized_activity_psg = pd.to_numeric(normalized_activity_psg)
    normalized_activity_empatica = pd.to_numeric(normalized_activity_empatica)

    # norm_psg_values = normalized_activity_psg.dropna().values
    norm_empatica_values = normalized_activity_empatica.values

    # # todo: add cross correlations using scipy (https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.correlate.html)
    # # todo: realign signals
    # # Determine the length of the aligned signals
    # max_len = max(len(norm_psg_values), len(norm_empatica_values))
    
    # # # Pad signals to the same length
    # # psg_padded = np.pad(norm_psg_values, (0, max_len - len(norm_psg_values)), mode='constant')
    # # empatica_padded = np.pad(norm_empatica_values, (0, max_len - len(norm_empatica_values)), mode='constant')
    
    # # Cross-correlation
    # corr = signal.correlate(norm_psg_values, norm_empatica_values, mode='same')
    # lags = signal.correlation_lags(len(norm_psg_values), len(norm_empatica_values), mode='same')
    # corr /= np.max(corr)
    
    # # max_corr = np.max(corr)
    
    # # Find the index corresponding to the maximum correlation coefficient
    # max_corr_index = np.argmax(corr)
    # lag_for_alignment = lags[max_corr_index]

    # # Alignment
    # if lag_for_alignment > 0:
    #     aligned_psg_values = norm_psg_values[:max_len]
    #     aligned_empatica_values = np.pad(norm_empatica_values, (lag_for_alignment, 0), mode='constant')[:max_len]
    # elif lag_for_alignment < 0:
    #     aligned_psg_values = np.pad(norm_psg_values, (-lag_for_alignment, 0), mode='constant')[:max_len]
    #     aligned_empatica_values = norm_empatica_values[:max_len]
    # else:
    #     aligned_psg_values = norm_psg_values[:max_len]
    #     aligned_empatica_values = norm_empatica_values[:max_len]
        
        
    # # cross-correlation
    # corr_a = signal.correlate(aligned_psg_values, aligned_empatica_values, mode='same')
    # lags_a = signal.correlation_lags(len(aligned_psg_values), len(aligned_empatica_values), mode='same')
    # corr_a /= np.max(corr_a)
    
    # print(f'Participant {participant_id}\nCorrelation without re-alignment (correlation at lags = 0) {corr[lags==0]}\nCorrelation after re-alignment (correlation at lags = 0) {corr_a[lags_a==0]}')  # Print mean correlation

    # all_corr_wo_alignment_empatica.append(corr[lags==0])
    # all_corr_after_alignment_empatica.append(float(corr_a[lags_a==0][0]))


    # todo: plots

    # Plotting
    fig, ax = plt.subplots(1,1, figsize = (10, 6))
    ax.plot(df_empatica_filtered.index, normalized_activity_empatica)
    # ax[0].plot(filtered_df_psg.index, normalized_activity_psg, color='blue', label='PSG')
    
    # ax[1].plot(lags, corr)

    # ax[2].plot(lags_a, corr_a)


    # Set labels and title
    ax.set_xlabel('Time')
    ax.set_ylabel('Normalized Activity Level')
    ax.set_title(f'Normalized Activity Level (Empatica) Between 22:00 and 07:00 for Participant {participant_id}')
    ax.legend()

    # Set x-axis ticks to every hour
    hours = HourLocator(interval=1)
    hour_format = DateFormatter("%H:%M")
    ax.xaxis.set_major_locator(hours)
    ax.xaxis.set_major_formatter(hour_format)
    # plt.xticks(rotation=45)

    # # set title and labels
    # ax[1].set_title(f'Cross-Correlated Signal for Participant {participant_id}')
    # ax[1].set_xlabel('Lag')
    # ax[1].set_ylabel('r')

    # # Add labels and title
    # ax[2].set_title(f'Aligned Cross-Correlated Signal for Participant {participant_id}')
    # ax[2].set_xlabel('Lag')
    # ax[2].set_ylabel('r')

    plt.ylim(0,1)
    plt.grid(True)
    plt.show()

# Main loop to process files
empatica_corr = []
total_participants = 0
has_psg_and_empatica_folder = 0
all_corr_wo_alignment_empatica = []
all_corr_after_alignment_empatica = []
for folder in os.listdir(root_folder):
    total_participants += 1
    # Check if it is a patient folder
    if (not folder.startswith("SMS")):
        continue
    participant_id = folder  # The folder name is the participant id
    file_path_psg = os.path.join(root_folder, participant_id, "Somnomedics", "Act.txt")
    file_path_empatica = os.path.join(root_folder, participant_id, "Empatica", f"{participant_id}-psg-Empatica-ACC.csv")
    problematic_patients = ['SMS_114','SMS_111','SMS_098','SMS_092','SMS_043']
    if os.path.exists(file_path_empatica) and os.path.exists(file_path_psg) and participant_id not in problematic_patients:
        process_file_psg_empatica(file_path_empatica, file_path_psg, participant_id)
        has_psg_and_empatica_folder += 1
    elif participant_id in problematic_patients: print(f'Problematic patient encountered: {participant_id}')
    else:
        print(f"Files missing for participant {participant_id}")

# # clean_corr = [x for x in empatica_corr if isinstance(x, (int, float)) and not np.isnan(x)]
# # print(f'Average correlation for Empatica is {np.mean(clean_corr)}')
# # print(f'Average correlation for Empatica is {np.mean(empatica_corr)}')
# print(f'Total number of participants:{total_participants}\nTotal number of participants compared for PSG and empatica: {has_psg_and_empatica_folder}')
# print(f'Number of omitted participants: {len(problematic_patients)}')
# clean_corr_wo_alignment = [x for x in all_corr_wo_alignment_empatica if isinstance(x, (int, float)) and not np.isnan(x)]
# print(f'Average correlation before alignment: {np.mean(clean_corr_wo_alignment)}')
# clean_corr_after_alignment = [x for x in all_corr_after_alignment_empatica if isinstance(x, (int, float)) and not np.isnan(x)]
# print(f'Average correlation after alignment: {np.mean(all_corr_after_alignment_empatica)}')

# Plots: Polysomnograph over Empatica (E4)

In [None]:
#Plot empatica on top of psg
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import HourLocator, DateFormatter
import datetime
from scipy import signal
import mne

# Function to process each file
def process_file_psg_empatica(file_path_empatica,file_path_psg, participant_id):
    try:
        df_empatica = pd.read_csv(file_path_empatica)
        df_empatica['timestamp'] = df_empatica['timestamp'].apply(datetime.datetime.fromtimestamp)
        df_empatica_filtered = df_empatica[(df_empatica['timestamp'].dt.hour >= 22) | (df_empatica['timestamp'].dt.hour < 7)]
        df_empatica_filtered.set_index('timestamp', inplace=True)
        df_empatica_filtered['act'] = np.sqrt(df_empatica_filtered['x']**2 + df_empatica_filtered['y']**2 + df_empatica_filtered['z']**2)

        # normalized_activity_empatica = (df_empatica_filtered['act'] - df_empatica_filtered['act'].min()) / (df_empatica_filtered['act'].max() - df_empatica_filtered['act'].min())
        normalized_activity_empatica = df_empatica_filtered['act'] / np.median(df_empatica_filtered['act'])
        normalized_activity_empatica = np.clip(normalized_activity_empatica, 0, 1)
        normalized_activity_empatica = 1 - normalized_activity_empatica
    except FileNotFoundError:
        print(f"File doesn't exist for emfit 05, participant {participant_id}")
        return

    with open(file_path_psg, 'r') as file:
        # Skip the first line
        next(file)
        lines = file.readlines()

    data_lines = [line.strip().split(';') for line in lines if ';' in line]

    data = {'time': [], 'activity': []}
    for data_line in data_lines:
        data['time'].append(data_line[0])
        # Remove any non-numeric characters from the activity data
        activity = ''.join(filter(str.isdigit, data_line[1]))
        data['activity'].append(activity)

    df = pd.DataFrame(data)

    try:
        datetime_format = '%d.%m.%Y %H:%M:%S,%f'
        df['time'] = pd.to_datetime(df['time'], format=datetime_format)
    except ValueError:
        datetime_format = '%H:%M:%S,%f'
        df['time'] = pd.to_datetime(df['time'], format=datetime_format)
    df.set_index('time', inplace=True)

    # Filter rows where the time is between 22:00 and 07:00
    filtered_df_psg = df.between_time('22:00', '07:00')

    # Convert 'activity' column to numeric type
    filtered_df_psg['activity'] = pd.to_numeric(filtered_df_psg['activity'])

    # Normalize the 'activity' data
    normalized_activity_psg = (filtered_df_psg['activity'] - filtered_df_psg['activity'].min()) / (
                filtered_df_psg['activity'].max() - filtered_df_psg['activity'].min())
    
    normalized_activity_psg = pd.to_numeric(normalized_activity_psg)
    normalized_activity_empatica = pd.to_numeric(normalized_activity_empatica)

    norm_psg_values = normalized_activity_psg.dropna().values
    norm_empatica_values = normalized_activity_empatica.values

    # todo: add cross correlations using scipy (https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.correlate.html)
    # todo: realign signals
    # Determine the length of the aligned signals
    max_len = max(len(norm_psg_values), len(norm_empatica_values))
    
    # # Pad signals to the same length
    # psg_padded = np.pad(norm_psg_values, (0, max_len - len(norm_psg_values)), mode='constant')
    # empatica_padded = np.pad(norm_empatica_values, (0, max_len - len(norm_empatica_values)), mode='constant')
    
    # Cross-correlation
    corr = signal.correlate(norm_psg_values, norm_empatica_values, mode='same')
    lags = signal.correlation_lags(len(norm_psg_values), len(norm_empatica_values), mode='same')
    corr /= np.max(corr)
    
    # max_corr = np.max(corr)
    
    # Find the index corresponding to the maximum correlation coefficient
    max_corr_index = np.argmax(corr)
    lag_for_alignment = lags[max_corr_index]

    # Alignment
    if lag_for_alignment > 0:
        aligned_psg_values = norm_psg_values[:max_len]
        aligned_empatica_values = np.pad(norm_empatica_values, (lag_for_alignment, 0), mode='constant')[:max_len]
    elif lag_for_alignment < 0:
        aligned_psg_values = np.pad(norm_psg_values, (-lag_for_alignment, 0), mode='constant')[:max_len]
        aligned_empatica_values = norm_empatica_values[:max_len]
    else:
        aligned_psg_values = norm_psg_values[:max_len]
        aligned_empatica_values = norm_empatica_values[:max_len]
        
        
    # cross-correlation
    corr_a = signal.correlate(aligned_psg_values, aligned_empatica_values, mode='same')
    lags_a = signal.correlation_lags(len(aligned_psg_values), len(aligned_empatica_values), mode='same')
    corr_a /= np.max(corr_a)
    
    print(f'Participant {participant_id}\nCorrelation without re-alignment (correlation at lags = 0) {corr[lags==0]}\nCorrelation after re-alignment (correlation at lags = 0) {corr_a[lags_a==0]}')  # Print mean correlation

    all_corr_wo_alignment_empatica.append(corr[lags==0])
    all_corr_after_alignment_empatica.append(float(corr_a[lags_a==0][0]))


    # todo: plots

    # Plotting
    fig, ax = plt.subplots(1,3, figsize = (18, 6))
    ax[0].plot(df_empatica_filtered.index, normalized_activity_empatica, color='orange', label='EMPATICA')
    ax[0].plot(filtered_df_psg.index, normalized_activity_psg, color='blue', label='PSG')
    
    ax[1].plot(lags, corr)

    ax[2].plot(lags_a, corr_a)


    # Set labels and title
    ax[0].set_xlabel('Time')
    ax[0].set_ylabel('Normalized Activity Level')
    ax[0].set_title(f'Normalized Activity Level Between 22:00 and 07:00 for Participant {participant_id}')
    ax[0].legend()

    # Set x-axis ticks to every hour
    hours = HourLocator(interval=1)
    hour_format = DateFormatter("%H:%M")
    ax[0].xaxis.set_major_locator(hours)
    ax[0].xaxis.set_major_formatter(hour_format)
    # plt.xticks(rotation=45)

    # set title and labels
    ax[1].set_title(f'Cross-Correlated Signal for Participant {participant_id}')
    ax[1].set_xlabel('Lag')
    ax[1].set_ylabel('r')

    # Add labels and title
    ax[2].set_title(f'Aligned Cross-Correlated Signal for Participant {participant_id}')
    ax[2].set_xlabel('Lag')
    ax[2].set_ylabel('r')


    plt.grid(True)
    plt.show()

# Main loop to process files
empatica_corr = []
total_participants = 0
has_psg_and_empatica_folder = 0
all_corr_wo_alignment_empatica = []
all_corr_after_alignment_empatica = []
for folder in os.listdir(root_folder):
    total_participants += 1
    # Check if it is a patient folder
    if (not folder.startswith("SMS")):
        continue
    participant_id = folder  # The folder name is the participant id
    file_path_psg = os.path.join(root_folder, participant_id, "Somnomedics", "Act.txt")
    file_path_empatica = os.path.join(root_folder, participant_id, "Empatica", f"{participant_id}-psg-Empatica-ACC.csv")
    problematic_patients = ['SMS_114','SMS_111','SMS_098','SMS_092','SMS_043']
    if os.path.exists(file_path_empatica) and os.path.exists(file_path_psg) and participant_id not in problematic_patients:
        process_file_psg_empatica(file_path_empatica, file_path_psg, participant_id)
        has_psg_and_empatica_folder += 1
    elif participant_id in problematic_patients: print(f'Problematic patient encountered: {participant_id}')
    else:
        print(f"Files missing for participant {participant_id}")

# clean_corr = [x for x in empatica_corr if isinstance(x, (int, float)) and not np.isnan(x)]
# print(f'Average correlation for Empatica is {np.mean(clean_corr)}')
# print(f'Average correlation for Empatica is {np.mean(empatica_corr)}')
print(f'Total number of participants:{total_participants}\nTotal number of participants compared for PSG and empatica: {has_psg_and_empatica_folder}')
print(f'Number of omitted participants: {len(problematic_patients)}')
clean_corr_wo_alignment = [x for x in all_corr_wo_alignment_empatica if isinstance(x, (int, float)) and not np.isnan(x)]
print(f'Average correlation before alignment: {np.mean(clean_corr_wo_alignment)}')
clean_corr_after_alignment = [x for x in all_corr_after_alignment_empatica if isinstance(x, (int, float)) and not np.isnan(x)]
print(f'Average correlation after alignment: {np.mean(clean_corr_after_alignment)}, std: {np.std(clean_corr_after_alignment)}')

In [None]:
corr = []
for array in all_corr_after_alignment_empatica:
    corr.append(float(array[0]))
clean_corr = [x for x in corr if isinstance(x, (int, float)) and not np.isnan(x)]
print(f'mean correlation for empatica after alignment: {np.mean(clean_corr)}')

In [None]:
participant_id = 'SMS_020'
for file in os.listdir(os.path.join(root_folder, participant_id, "Somnomedics")):
    if file.startswith('UNI8'):
        file_path_edf = os.path.join(root_folder, participant_id, "Somnomedics", file)
edf_data = mne.io.read_raw_edf(file_path_edf, preload=True)

df_edf = edf_data.to_data_frame()
print(df_edf)

In [None]:
print(df_edf.columns)

# Fitbit

In [None]:
participant_id = 'SMS_179'
if os.path.exists(os.path.join(root_folder, participant_id, "Fitbit")):
        for file in os.listdir(os.path.join(root_folder, participant_id, "Fitbit")):
                file_path_fitbit = os.path.join(root_folder, participant_id, "Fitbit", file)

try:
        df_fitbit = pd.read_csv(file_path_fitbit, sep=';')
        df_fitbit['dateTime'] = pd.to_datetime(df_fitbit['dateTime'])
        df_fitbit_filtered= df_fitbit[df_fitbit['dateTime'].dt.date == pd.to_datetime('2023-02-12').date()]
        df_fitbit_filtered = df_fitbit[(df_fitbit['dateTime'].dt.hour >= 22) | (df_fitbit['dateTime'].dt.hour < 7)]
        df_fitbit_filtered.set_index('dateTime', inplace=True)
        df_fitbit_filtered = df_fitbit_filtered.dropna(subset=['activities/calories'])


        normalized_activity_fitbit = (df_fitbit_filtered['activities/calories'] - df_fitbit_filtered['activities/calories'].min()) / (df_fitbit_filtered['activities/calories'].max() - df_fitbit_filtered['activities/calories'].min())
       
        print(f'Participant {participant_id}')
        fig, ax = plt.subplots()
        ax.plot(df_fitbit_filtered.index, normalized_activity_fitbit, color='orange', label='fitbit')
        # ax.plot(filtered_df_psg.index, normalized_activity_psg, color='blue', label='PSG')

        # Set labels and title
        ax.set_xlabel('Time')
        ax.set_ylabel('Normalized Activity Level')
        ax.set_title(f'Normalized Activity Level Between 22:00 and 07:00 for Participant {participant_id}')
        ax.legend()

        # Set x-axis ticks to every hour
        hours = HourLocator(interval=1)
        hour_format = DateFormatter("%H:%M")
        ax.xaxis.set_major_locator(hours)
        ax.xaxis.set_major_formatter(hour_format)
        plt.xticks(rotation=45)
        plt.show()
except FileNotFoundError:
        print(f"File doesn't exist for fitbit, participant {participant_id}")
        # return


# EMG 1

In [None]:
df_edf['time'] = pd.to_datetime(df_edf['time'])  # Convert time_column to datetime if it's not already
df_edf.set_index('time', inplace=True) 
df_edf['EMG1_normalized'] = (df_edf['EMG1'] - df_edf['EMG1'].min()) / (df_edf['EMG1'].max() - df_edf['EMG1'].min())
plt.plot(df_edf.index, df_edf['EMG1_normalized'])
plt.xlabel('Time')
plt.ylabel('EMG1')
plt.title('EMG1 vs Time')
plt.show()