# Prepare Data
1. load power df
1. reindex with subject and condition
2. load behavioral df
3. melt behavioral on condition
4. join power and behavioral 
5. drop unnecessary rows and columns

In [2]:
# setup 
import pandas as pd
 
# open behavioral data and ids map
bh = pd.read_csv('docs/plb_hyp_data.csv', index_col='index')
ids_map = pd.read_excel('docs/ids_map.xlsx', header=1, index_col='behavioral_id')
ids_map = ids_map.drop_duplicates('bids_id')
ids_map = ids_map[['bids_id']]
ids_map['bids_id'] = ids_map['bids_id'].apply(lambda x:str(x).zfill(2))
bh = bh.join(ids_map, how='right')
bh = bh.melt(
    id_vars=['procedure_type_1', 'procedure_type_2', 'procedure_type_3', 'procedure_type_4', 'bids_id',
             'description_type_1', 'description_type_2', 'description_type_3', 'description_type_4'],
    value_vars=['hypnosis_depth_1', 'hypnosis_depth_2', 'hypnosis_depth_3', 'hypnosis_depth_4'])
bh['session'] = bh['variable'].apply(lambda x:x.split('_')[2])
bh['procedure'] = bh.apply(lambda r: r['procedure_type_'+r['session']], axis=1)
bh['description'] = bh.apply(lambda r: r['description_type_'+r['session']], axis=1)
bh = bh[['bids_id', 'value', 'procedure', 'description', 'session']].sort_values(by=['bids_id', 'session']).set_index('bids_id')
bh = bh.rename(columns={'value':'hypnosis_depth'})
bh.reset_index(inplace=True)
bh.head()

Unnamed: 0,bids_id,hypnosis_depth,procedure,description,session
0,1,6.0,whitenoise,hypnosis,1
1,1,7.0,relaxation,control,2
2,1,8.0,confusion,hypnosis,3
3,1,1.0,embedded,control,4
4,2,1.0,relaxation,control,1


In [3]:
# open power data
power = pd.read_excel('docs/psds_new.xlsx', index_col='Unnamed: 0', header=1)
power.columns = power.columns.str.replace('lower_gamma', 'lowergamma')
power[['bids_id', 'condition']] = power.index.to_series().apply(lambda x:x.split('-')).apply(pd.Series)
power['session'] = power['condition'].apply(lambda x:x[-1])
power.reset_index(drop=True, inplace=True)
power = pd.merge(bh, power, how='right', on=['session', 'bids_id'], right_index=False)
power = power.sort_values(by=['bids_id', 'session', 'condition']).reset_index(drop=True)
power.insert(1, 'condition', power.pop('condition'))
# power.to_csv('data/classification_dataset.csv')
power.head()

Unnamed: 0,bids_id,condition,hypnosis_depth,procedure,description,session,LF-delta,LF-theta,LF-alpha,LF-beta,...,OZ-alpha,OZ-beta,OZ-lowergamma,OZ-broadband,all-delta,all-theta,all-alpha,all-beta,all-lowergamma,all-broadband
0,1,baseline1,6.0,whitenoise,hypnosis,1,-122.424173,-125.516319,-121.826066,-126.726071,...,-120.729724,-128.817542,-132.271175,-127.574757,-122.292748,-124.91369,-120.7541,-126.834416,-128.114867,-125.836852
1,1,experience1,6.0,whitenoise,hypnosis,1,-122.531498,-126.011196,-124.445181,-130.393538,...,-123.073175,-130.443607,-135.971941,-129.466991,-122.106007,-125.097978,-123.262546,-129.697441,-133.196375,-128.713921
2,1,induction1,6.0,whitenoise,hypnosis,1,-121.455723,-124.505864,-120.780357,-124.235305,...,-121.421566,-128.501764,-130.977499,-127.131589,-121.864839,-124.399796,-120.43978,-125.356041,-126.241664,-124.586203
3,1,baseline2,7.0,relaxation,control,2,-122.084156,-124.253605,-119.868965,-125.685411,...,-119.165812,-126.126301,-127.266913,-124.800892,-122.038104,-124.280805,-119.504229,-126.459414,-128.39612,-125.500767
4,1,experience2,7.0,relaxation,control,2,-121.172789,-124.401927,-123.769855,-131.320283,...,-122.368971,-129.709502,-135.572473,-128.835057,-121.65959,-124.411625,-122.914792,-130.862341,-136.256602,-129.857052


In [None]:
# debug
# power = power.query('condition.str.contains("experience")', engine='python')

In [6]:
DATA = pd.read_csv(f'data/classification_datasets/tobedeleted/plv_source.csv')
DATA.sort_values(by=['bids_id', 'session'], inplace=True)
DATA.drop(columns=['bids_id', 'session', 'condition','Unnamed: 0.1',
                   'Unnamed: 0'], inplace=True)
# DATA.to_csv(f'data/classification_datasets/plv_source.csv', index=False)

[]