In [2]:
import pandas as pd
import numpy as np
import yaml
import json

Early notes: There are some subjects in the repository which were not ultimately used in the published form of this project (in particular, anything with 'IIC' in it - these were interictal - ictal continuum patients which we did not want to use in training the model), as well as a few others which clinicians on our team judged to have insufficient data quality. We also had to re-upload files recently under new IDs on ieeg.org for deidentification purposes which may have made some of the annotation files fail. 

It looks like the annotations are labeled in seconds.
I'm not sure what the ii_start and ii_start are for.

In [3]:
pt_df = pd.read_csv('patient_table.csv')

In [4]:
pt_df

Unnamed: 0,portal_id,annotation_no,sz_presence,diagnosis,Age,Sex,Test_set,sz_num,mean_sz_len,record_len,num_artifact
0,ICUDataRedux_0060,1,1,AMS,87,F,0,2,100.500,30000,3
1,ICUDataRedux_0061,2,1,SZ,60,F,0,49,110.180,31855,266
2,ICUDataRedux_0062,3,1,SZ,64,M,0,22,22.136,23500,1
3,ICUDataRedux_0063,4,1,SZ,59,F,0,5,20.800,30170,198
4,ICUDataRedux_0064,5,1,SZ,45,F,0,39,74.179,14995,2
...,...,...,...,...,...,...,...,...,...,...,...
92,ICUDataRedux_0035,130,2,AMS,30,M,1,0,,10000,0
93,ICUDataRedux_0043,131,2,SZ,38,F,1,0,,10000,0
94,ICUDataRedux_0044,132,2,Anoxic,64,M,1,0,,10000,0
95,ICUDataRedux_0047,133,2,AMS,58,F,1,0,,10000,46


In [5]:
pt_df = pt_df.sort_values(by='annotation_no')
pt_df.to_csv('patient_table_sorted.csv',index=False)

In [6]:
annotations = yaml.safe_load(open('Data/annotations.yaml'))

In [7]:
ptdf2 = pt_df.set_index(['annotation_no'])
ptdatadict = ptdf2.to_dict(orient='index')

In [9]:
len(ptdatadict)
ptdatadict

{1: {'portal_id': 'ICUDataRedux_0060',
  'sz_presence': 1,
  'diagnosis': 'AMS',
  'Age': 87,
  'Sex': 'F',
  'Test_set': 0,
  'sz_num': 2,
  'mean_sz_len': 100.5,
  'record_len': 30000,
  'num_artifact': 3},
 2: {'portal_id': 'ICUDataRedux_0061',
  'sz_presence': 1,
  'diagnosis': 'SZ',
  'Age': 60,
  'Sex': 'F',
  'Test_set': 0,
  'sz_num': 49,
  'mean_sz_len': 110.18,
  'record_len': 31855,
  'num_artifact': 266},
 3: {'portal_id': 'ICUDataRedux_0062',
  'sz_presence': 1,
  'diagnosis': 'SZ',
  'Age': 64,
  'Sex': 'M',
  'Test_set': 0,
  'sz_num': 22,
  'mean_sz_len': 22.136,
  'record_len': 23500,
  'num_artifact': 1},
 4: {'portal_id': 'ICUDataRedux_0063',
  'sz_presence': 1,
  'diagnosis': 'SZ',
  'Age': 59,
  'Sex': 'F',
  'Test_set': 0,
  'sz_num': 5,
  'mean_sz_len': 20.8,
  'record_len': 30170,
  'num_artifact': 198},
 5: {'portal_id': 'ICUDataRedux_0064',
  'sz_presence': 1,
  'diagnosis': 'SZ',
  'Age': 45,
  'Sex': 'F',
  'Test_set': 0,
  'sz_num': 39,
  'mean_sz_len': 74.

In [13]:
for anum,data in annotations.items():
    num = int(anum)
    if num in ptdatadict:
        #print(f"{type(data)}")
        #print(f"{data}")
        data['portal_id'] = ptdatadict[num]['portal_id']
        data['sz_presence'] = ptdatadict[num]['sz_presence']
        data['diagnosis'] = ptdatadict[num]['diagnosis']
        if ptdatadict[num]['sz_presence']:
            data['pt_type'] = 1
        else:
            data['pt_type'] = 2
        del data['type']

In [14]:
yaml.safe_dump(annotations, open('patient_annotations.yaml','w+'))
json.dump(annotations, open('patient_annotations.json','w+'), indent=2)

In [15]:
annotations

{1: {'data_start': 24000,
  'data_stop': 54000,
  'file_num': 1,
  'ii_start': [24000, 24846, 51868],
  'ii_stop': [24742, 51771, 54000],
  'patient': 'ICUDataRedux_0060',
  'sz_start': [24742, 51771],
  'sz_stop': [24846, 51868],
  'portal_id': 'ICUDataRedux_0060',
  'sz_presence': 1,
  'diagnosis': 'AMS',
  'pt_type': 1},
 2: {'data_start': 1968,
  'data_stop': 33825,
  'file_num': 2,
  'ii_start': [2055,
   6120,
   6288,
   8491,
   9455,
   10471,
   10690,
   13432,
   13612,
   13853,
   14581,
   14802,
   15572,
   15771,
   15809,
   16052,
   18846,
   19029,
   19278,
   21721,
   21899,
   22098,
   23607,
   23763,
   24958,
   25163,
   25214,
   25282,
   26738,
   26915,
   27081,
   27265,
   28477,
   28638,
   28760,
   28930,
   30003,
   30171,
   30237,
   30395,
   31266,
   31423,
   31529,
   31671,
   32483,
   32659,
   32741,
   33720],
  'ii_stop': [6057,
   6208,
   8413,
   9376,
   10373,
   10569,
   13350,
   13504,
   13721,
   14476,
   14619,
   15