# Preprocess subject data

In [1]:
import pandas as pd

In [13]:
import sys
import os
src_path = os.path.dirname(os.path.abspath("../../src"))
sys.path.append(src_path)
from src.preprocessing.classes import Subject
from src.utils.config import Config
import src.configs.config as configs
import src.preprocessing.utils as pre_utils 

# Load in subject IDs and paths from CFGLog
config = Config.from_json(configs.CFGLog)

CLBP_CP_subject_ids = config.data.chronic_low_back_pain.subject_ids.CP
CLBP_HC_subject_ids = config.data.chronic_low_back_pain.subject_ids.HC

# CLBP already processed
clbp_processed_path = config.data.chronic_low_back_pain.processed_path

# Define preprocessing parameters
# Combine all subject IDs into dict separated by group
sub_ids = {
    "chronic_low_back_pain": CLBP_CP_subject_ids + CLBP_HC_subject_ids
}

print(f"Total subjects: {len(sub_ids['chronic_low_back_pain'])}")
print(sub_ids)

# Initialize dataframe
df = pd.DataFrame(columns=['subject_id', 'num_remaining_epochs', 'num_dropped', 'percent_dropped'])
dropped_epochs = {}

for group in sub_ids:
    for sub_id in sub_ids[group]:
        subject = Subject(sub_id, group)
        
        # Load in Epochs data
        subject.load_epochs()
        num_epochs = len(subject.epochs)
        subject.load_epochs_info(clbp_processed_path)
        drop_log = subject.drop_log['drop_log'][0] if len(subject.drop_log['drop_log']) > 0 else []
        
        # Save number of remaining epochs, number dropped, and percent dropped to dataframe
        print(f"Subject: {sub_id}, Num Remaining Epochs: {num_epochs - len(drop_log)}, Num Dropped: {len(drop_log)}, Percent Dropped: {len(drop_log)/num_epochs}")
        df.loc[len(df.index)] = [sub_id, num_epochs - len(drop_log), len(drop_log), (len(drop_log)/num_epochs)*100]
        
        # Make a dictionary where the keys are the subject IDs and the values are the dropped epoch indices
        if len(drop_log) > 0:
            dropped_epochs.update({sub_id: drop_log})
            # subject.save(dropped_epochs=dropped_epochs)

Total subjects: 43
{'chronic_low_back_pain': ['018', '022', '024', '031', '032', '034', '036', '039', '040', '045', '046', '052', '020', '021', '023', '029', '037', '041', '042', '044', '048', '049', '050', '056', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C2.', 'C24', 'C25', 'C26', 'C27', 'C3.', 'C6.', 'C7.', 'C9.']}
Loaded epochs for subject 018
Subject: 018, Num Remaining Epochs: 73, Num Dropped: 0, Percent Dropped: 0.0
Loaded epochs for subject 022
Subject: 022, Num Remaining Epochs: 52, Num Dropped: 4, Percent Dropped: 0.07142857142857142
Loaded epochs for subject 024
Subject: 024, Num Remaining Epochs: 60, Num Dropped: 0, Percent Dropped: 0.0
Loaded epochs for subject 031
Subject: 031, Num Remaining Epochs: 60, Num Dropped: 0, Percent Dropped: 0.0
Loaded epochs for subject 032
Subject: 032, Num Remaining Epochs: 22, Num Dropped: 19, Percent Dropped: 0.4634146341463415
Loaded epochs for subject 034
Subject: 034, Num Remaining Epochs: 60, Num Dropped: 0,

In [14]:
df.set_index('subject_id', inplace=True)
df

Unnamed: 0_level_0,num_remaining_epochs,num_dropped,percent_dropped
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
018,73,0,0.0
022,52,4,7.142857
024,60,0,0.0
031,60,0,0.0
032,22,19,46.341463
034,60,0,0.0
036,55,2,3.508772
039,44,8,15.384615
040,52,4,7.142857
045,60,0,0.0


In [15]:
dropped_epochs

{'022': array([14, 18, 36, 38]),
 '032': array([ 0,  3,  9, 22, 24, 27, 28, 34, 36, 38, 42, 43, 44, 45, 47, 48, 51,
        56, 59]),
 '036': array([52, 56]),
 '039': array([12, 16, 31, 32, 33, 50, 56, 59]),
 '040': array([15, 24, 31, 42]),
 '052': array([19]),
 '021': array([ 12,  29,  34,  40,  41,  52,  53,  72, 113, 116]),
 '023': array([32, 33, 46, 55]),
 '029': array([10, 30, 31, 36, 37, 41, 49, 53, 56, 57, 58, 59]),
 '037': array([27, 28, 30, 35, 37, 38, 44, 45, 46, 47, 48, 49, 50, 52, 54]),
 '041': array([26]),
 '044': array([ 0,  1,  2,  3,  4,  5,  6,  8, 10, 11, 13, 15, 18, 20, 26, 28, 29,
        46, 53, 54]),
 '049': array([13, 17, 29, 50, 54]),
 '050': array([13, 16]),
 '056': array([33]),
 'C10': array([ 0, 18, 54]),
 'C11': array([41]),
 'C15': array([10, 45, 48]),
 'C16': array([10, 34, 49]),
 'C19': array([ 0,  6, 18, 19]),
 'C2.': array([ 6,  9, 30, 48, 60, 61]),
 'C24': array([19, 38, 54, 55, 57]),
 'C25': array([ 0,  1, 50]),
 'C27': array([ 7, 13, 15, 19, 22, 45, 

In [4]:
# save to csv in data/chronic_low_back_pain/percent_trials_dropped.csv
df.to_csv(f'../../data/{group}/percent_trials_dropped.csv')


In [7]:
# load in ratings master xlsx sheet for all subjects
df_master = pd.read_excel('../../data/chronic_low_back_pain/ratings_master.xlsx')

In [10]:
df_master = df_master.T
# make the first column the index and name it subject_id
df_master.index.name = 'subject_id'
df_master.columns = df_master.iloc[0]
df_master = df_master.iloc[1:]
df_master

pinprick,NaN,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,111.0,112.0,113.0,114.0,115.0,116.0,117.0,118.0,119.0,120.0
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6,Male,1,1,1,1,0,1,1,1,1.0,...,,,,,,,,,,
7,Male,1,1,1,2,1,3,1,1,1.0,...,,,,,,,,,,
8,Female,1,1,1,2,1,1,1,1,1.5,...,,,,,,,,,,
9,Male,4,1,1,1,1,4,3,8,9.0,...,,,,,,,,,,
10,Male,2,1,2,3,1,2,2,2,1.0,...,,,,,,,,,,
13,Female,1,1,1,2,2,1,2,1,1.0,...,,,,,,,,,,
14,Male,3,1,3,1,3,1,3,2,2.0,...,,,,,,,,,,
15,Male,1,2,3,2,1,1,4,2,2.0,...,,,,,,,,,,
16,Male,1,2,0,1,2,1,1,2,1.0,...,,,,,,,,,,
17,Female,1,1,1,1,1,1,2,1,1.0,...,,,,,,,,,,


In [19]:
# use dropped_epochs dictionary to delete the dropped epochs from the dataframe for each subject individually
for sub_id in dropped_epochs:
    # remove the 0 in front of each sub_id
    sub_id = sub_id[1:]
    df_master.loc[sub_id] = df_master.loc[sub_id].drop(dropped_epochs[sub_id])
    

KeyError: '22'

In [20]:
df_master

pinprick,NaN,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,111.0,112.0,113.0,114.0,115.0,116.0,117.0,118.0,119.0,120.0
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6,Male,1,1,1,1,0,1,1,1,1.0,...,,,,,,,,,,
7,Male,1,1,1,2,1,3,1,1,1.0,...,,,,,,,,,,
8,Female,1,1,1,2,1,1,1,1,1.5,...,,,,,,,,,,
9,Male,4,1,1,1,1,4,3,8,9.0,...,,,,,,,,,,
10,Male,2,1,2,3,1,2,2,2,1.0,...,,,,,,,,,,
13,Female,1,1,1,2,2,1,2,1,1.0,...,,,,,,,,,,
14,Male,3,1,3,1,3,1,3,2,2.0,...,,,,,,,,,,
15,Male,1,2,3,2,1,1,4,2,2.0,...,,,,,,,,,,
16,Male,1,2,0,1,2,1,1,2,1.0,...,,,,,,,,,,
17,Female,1,1,1,1,1,1,2,1,1.0,...,,,,,,,,,,
