# Analysis of the ''EarSet'' dataset from the paper ''EarSet: A Multi-Modal Dataset for Studying the Impact of Head and Facial Movements  on In-Ear PPG Signals''

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
#import scipy.signal import buttter, filtfilt

In [None]:
# Get the list of all files and directories in P0 directory
path = '../data/Dataset/P0/EARBUDS/'
dir_list = os.listdir(path)

# Store the action name for the files in a list
action_name_for_file = ['still', 'nod', 'shake', 'tilt', 'eyes-ud', 'eyes-lr', 'brow-raiser', 'brow-lowerer', 'wink-r',
                        'wink-l', 'lip-puller', 'chin-raiser', 'mouth-stretch-', 'chewing', 'speaking', 'walking', 'running']

# Define an action mapping table with the actions from the paper
actions = ['Still', 'Nod', 'Shake', 'Tilt', 'Vertical Eyes Movements', 'Horizontal Eyes Movements', 'Brow Raiser', 'Brow Lowerer', 'Right Eye Wink', 'Left Eye Wink',
           'Lip Puller', 'Chin Raiser', 'Mouth Stretch', 'Chewing', 'Speaking', 'Walking', 'Running']
atoi = {a:i for i,a in enumerate(actions)}
itoa = {i:a for i,a in enumerate(action_name_for_file)}

print(atoi)

In [None]:
# Automate input reading: select patient, action, imu/ppg, left/right ear, sensor congiration (from 1-4)
patient = 0
action = 7
imu = False
ppg = True
right_ear = False
left_ear = True
sensor_configuration = 1

input_file_path = '../data/Dataset/P'+ str(patient) + '/EARBUDS/' + str(patient) + '-' + action_name_for_file[action] + '-' + ['imu' if imu==1 else ('ppg' if ppg else None)][0] +                  '-' + ['right' if right_ear==1 else ('left' if left_ear else None)][0] + '.csv'
input_file_path

In [None]:
# Read in csv file, add sensor configration type and change to ms timestamps
df_data = pd.read_csv(
    input_file_path,
    sep=',',           # specify delimiter (default is ',')
    header=0,          # row number to use as column names (0 means the first row)
    na_values=['NA', ''],  # specify which values should be considered NaN
)

if ppg:
    # Add a 'configuration' column initialized with NaN values
    df_data['sensor_config'] = pd.NA

    # Start with adding sensor configuration index
    config_type = 1
    config_rows = []

    # Loop through rows, change timesteps, store configuration changes and assign configuration index
    for i, row in df_data.iterrows():
        if row['timestamp'].startswith('#'):
            df_data.at[i, 'sensor_config'] = 'Config Change'
            config_rows.append(i)
            config_type += 1
        else:
            df_data.at[i, 'sensor_config'] = config_type
            df_data.at[i, 'timestamp'] = pd.to_datetime(df_data.at[i, 'timestamp'], unit='ms')

    # Select the sensor configuration, 
    df_data_sensor_config = df_data[df_data['sensor_config'] == sensor_configuration]

    # Convert all ppg signals to numpy arrays, change dtype to int and discard the first and last second
    array_green_sensor_config = np.array(df_data_sensor_config['green']).astype(int)[100:-100]
    array_ir_sensor_config = np.array(df_data_sensor_config['ir']).astype(int)[100:-100]
    array_red_sensor_config = np.array(df_data_sensor_config['red']).astype(int)[100:-100]

    # Print the resulting DataFrame
    print("Time of sensor configuration change:", '\n', df_data.iloc[config_rows], '\n')
    #print("Added sensor_config and converted timesteps:", '\n',df_data)
    print("Selected sensor configuration:", '\n', df_data_sensor_config)
    print("The green ppg signal -", "sample size:", '\n', array_green_sensor_config, '-', len(array_green_sensor_config))
    print("The ir ppg signal -", "sample size:", '\n', array_ir_sensor_config, '-', len(array_ir_sensor_config))
    print("The red ppg signal -", "sample size:", '\n', array_red_sensor_config, '-', len(array_red_sensor_config))

In [None]:
# Create a subplot figure showing the 3 different ppg signals
downsampling_rate = 1
fig, axes = plt.subplots(1, 3, figsize=(20, 6))  # 1 row, 3 columns

axes[0].plot(array_green_sensor_config[::downsampling_rate], label='Green PPG', color='g')
axes[0].set_title("Green PPG signals")
axes[0].set_xlabel("samples")
axes[0].set_ylabel("arb. unit")
axes[0].legend()

axes[1].plot(array_ir_sensor_config[::downsampling_rate], label='IR PPG', color='darkviolet')
axes[1].set_title("IR PPG signals")
axes[1].set_xlabel("samples")
axes[1].set_ylabel("arb. unit")
axes[1].legend()

axes[2].plot(array_red_sensor_config[::downsampling_rate], label='Red PPG', color='r')
axes[2].set_title("Red PPG signals")
axes[2].set_xlabel("samples")
axes[2].set_ylabel("arb. unit")
axes[2].legend()

plt.show()

In [None]:
# Plotting the measurements over time
plt.figure(figsize=(20, 12))

# Plot each measurement series over time
plt.plot(array_green_sensor_config, label='green PPG', color='g')
#plt.plot(array_ir_sensor_config, label='ir PPG')
#plt.plot(array_red_sensor_config, label='red PPG')

# Labels and legend
plt.title("Green PPG Signal over Time")
plt.xlabel("Samples")
plt.ylabel("arb. unit")
plt.legend()
plt.xticks(rotation=45)
plt.grid(False)

# Display the plot
plt.show()