- input: 6 ch EEG (F3, F4, C3, C4, O1, O2) + 2 ch ground (A1, A2)
- output: probabilistic hypnogram

step1. Load txt  
step2. Re-referencing  
step3. Create instances of mne.raw  
step4. Cropping to make the length of time 5h 30m  
step5. Automatic sleep staging using YASA for all six EEG channels  
step6. Ensamble and get the probabilistic hypnogram after ensamble  

In [1]:
import os
from tqdm import tqdm
import pandas as pd
import txt_to_prob as tp

# 각 class 별, subject ID read
print("Load subject's ID")
# print('-- Read PSG_list2.xlsx')
df_subject_ID = pd.read_csv('D:\\USC\\code_mine\\txt_to_prob\\subject_info\\PSG_list_by_CYS.csv')

pd_healthy = df_subject_ID.iloc[:,0].dropna()
pd_OSA = df_subject_ID.iloc[:,1].dropna()
pd_INS = df_subject_ID.iloc[:,2].dropna()
pd_COMISA = df_subject_ID.iloc[:,3].dropna()

pd_total = pd.concat([pd_healthy, pd_OSA, pd_INS, pd_COMISA])
print("-- Number of subjects: Healthy({}), OSA({}), Insomnia({}), COMISA({})"
      .format(len(pd_healthy), len(pd_OSA), len(pd_INS), len(pd_COMISA)))
print("-- Total: {}".format(len(pd_total)))

# path_txt = 'E:\\samsung_original\\ensamble_test' # txt 파일이 저장되어 있는 경로/
path_txt = 'E:\\samsung_original\\whole_data_txt\\data1' # txt 파일이 저장되어 있는 경로
txt_subjects = pd.Series(os.listdir(path_txt)) # 모든 txt 파일의 filename이 담겨 있는 pd.Series

df_SOL = pd.read_csv('subject_info\\all_class.csv', index_col=0)

print('Number of test files: {}'.format(txt_subjects.size))

# 이미 probabilistic hypnogram이 생성된 subject list
path_prob_hyp = 'E:\\probabilistic_hypnogram'
list_hyp_exist = os.listdir(path_prob_hyp)


Load subject's ID
-- Number of subjects: Healthy(1283), OSA(1933), Insomnia(727), COMISA(785)
-- Total: 4728
Number of test files: 1499


In [2]:
QNAN = []
for sub_ID in tqdm(pd_total,desc='Processing data1 (n=1499) ... '):
       
    # Read txt files and extract eeg data for all channels
    txt2np = tp.txt2np(path_txt, txt_subjects, sub_ID)
    txt2np.txt_filenames()

    if sub_ID in list_hyp_exist:
          # 이미 probabilistic hypnogram이 있는 경우, pass !
          print('Already processed subject ... ')
          continue 

    if len(txt2np.temp_txt_subjects) < 2:
          continue
    np_all_eeg = txt2np.read_txt()
    QNAN.append(txt2np.QNAN_dic)
    # Create mne.raw instance 
    # - data: np_all_eeg
    # - raw.info['subject_info']['his_id']: subject ID
    # - preprocessing: re_referencing + cropping
    # - raw.ch_names: ['F3-A2', 'F4-A1', 'C3-A2', 'C4-A1', 'O1-A2', 'O2-A1']
    np2raw = tp.np2raw(sub_ID, df_SOL)
    np2raw.np2raw(np_all_eeg)
    np2raw.re_ref()
    raw_re_ref_cropped = np2raw.raw_cropping()

    # Automatic Sleep Staging
    # - apply ensamble using all six eeg channels
    # - automatiaclly saved
    #   current directory
    #   -- hypnograms
    #   -- -- subject_1
    #   -- -- subject_2
    #   -- -- ...
    #   -- -- subject_n
    #   -- -- -- predicted_hypnogram.csv
    #   -- -- -- probabilistic_hypnogram.csv
    automatic_staging = tp.automatic_staging()
    automatic_staging.get_hypnos_and_probs(raw_re_ref_cropped.copy())
    automatic_staging.ensamble_stagig()

Processing data1 (n=1499) ... :  22%|██▏       | 1042/4728 [00:03<00:12, 296.89it/s]



-- now subject ID : PE190458, number of txt files : 2
['PE190458, 김홍길_A1_.txt' 'PE190458, 김홍길_C4_.txt']
Already processed subject ... 


-- now subject ID : PE190554, number of txt files : 2
['PE190554, 이영택_A1_.txt' 'PE190554, 이영택_C4_.txt']
Already processed subject ... 


-- now subject ID : PE190602, number of txt files : 2
['PE190602, 박찬용_A1_.txt' 'PE190602, 박찬용_C4_.txt']
Already processed subject ... 


-- now subject ID : PE190629, number of txt files : 2
['PE190629, 김혜정_A1_.txt' 'PE190629, 김혜정_C4_.txt']
Already processed subject ... 


-- now subject ID : PE190639, number of txt files : 2
['PE190639, 임선하_A1_.txt' 'PE190639, 임선하_C4_.txt']
Already processed subject ... 


-- now subject ID : PE190660, number of txt files : 2
['PE190660, 박주오_A1_.txt' 'PE190660, 박주오_C4_.txt']
Already processed subject ... 


Processing data1 (n=1499) ... :  23%|██▎       | 1101/4728 [00:03<00:13, 259.80it/s]



-- now subject ID : PE190696, number of txt files : 2
['PE190696, 박준우_A1_.txt' 'PE190696, 박준우_C4_.txt']
Already processed subject ... 


-- now subject ID : PE190723, number of txt files : 2
['PE190723, 김민경_A1_.txt' 'PE190723, 김민경_C4_.txt']
Already processed subject ... 


-- now subject ID : PE190731, number of txt files : 2
['PE190731, 강영모_A1_.txt' 'PE190731, 강영모_C4_.txt']
Already processed subject ... 


-- now subject ID : PE190732, number of txt files : 2
['PE190732, 김빈나_A1_.txt' 'PE190732, 김빈나_C4_.txt']
Already processed subject ... 


-- now subject ID : PE190736, number of txt files : 2
['PE190736, 장사현_A1_.txt' 'PE190736, 장사현_C4_.txt']
Already processed subject ... 


-- now subject ID : PE190737, number of txt files : 2
['PE190737, 정윤득_A1_.txt' 'PE190737, 정윤득_C4_.txt']
Already processed subject ... 


-- now subject ID : PE190745, number of txt files : 2
['PE190745, 박동석_A1_.txt' 'PE190745, 박동석_C4_.txt']
Already processed subject ... 


-- now subject ID : PE190749, number of

Processing data1 (n=1499) ... :  23%|██▎       | 1101/4728 [00:20<00:13, 259.80it/s]

Creating RawArray with float64 data, n_channels=2, n_times=4556839
    Range : 0 ... 4556838 =      0.000 ... 22784.190 secs
Ready.
Available channels: ['C4-A1']


Processing data1 (n=1499) ... :  24%|██▎       | 1112/4728 [00:40<26:41,  2.26it/s] 



-- now subject ID : PE190951, number of txt files : 2
['PE190951, 송은실_A1_.txt' 'PE190951, 송은실_C4_.txt']
Creating RawArray with float64 data, n_channels=2, n_times=5383909
    Range : 0 ... 5383908 =      0.000 ... 26919.540 secs
Ready.
Available channels: ['C4-A1']


Processing data1 (n=1499) ... :  24%|██▎       | 1113/4728 [01:05<52:14,  1.15it/s]



-- now subject ID : PE190956, number of txt files : 2
['PE190956, 서지영_A1_.txt' 'PE190956, 서지영_C4_.txt']
Creating RawArray with float64 data, n_channels=2, n_times=5313109
    Range : 0 ... 5313108 =      0.000 ... 26565.540 secs
Ready.
Available channels: ['C4-A1']


Processing data1 (n=1499) ... :  24%|██▎       | 1114/4728 [01:44<1:45:56,  1.76s/it]



-- now subject ID : PE190957, number of txt files : 2
['PE190957, 이재민_A1_.txt' 'PE190957, 이재민_C4_.txt']
Creating RawArray with float64 data, n_channels=2, n_times=5573609
    Range : 0 ... 5573608 =      0.000 ... 27868.040 secs
Ready.
Available channels: ['C4-A1']


Processing data1 (n=1499) ... :  24%|██▎       | 1115/4728 [02:03<2:19:52,  2.32s/it]



-- now subject ID : PE190960, number of txt files : 2
['PE190960, 김영희_A1_.txt' 'PE190960, 김영희_C4_.txt']
Creating RawArray with float64 data, n_channels=2, n_times=6019089
    Range : 0 ... 6019088 =      0.000 ... 30095.440 secs
Ready.
Available channels: ['C4-A1']


Processing data1 (n=1499) ... :  24%|██▎       | 1116/4728 [02:25<3:10:20,  3.16s/it]



-- now subject ID : PE190966, number of txt files : 2
['PE190966, 서정자_A1_.txt' 'PE190966, 서정자_C4_.txt']
Creating RawArray with float64 data, n_channels=2, n_times=4548869
    Range : 0 ... 4548868 =      0.000 ... 22744.340 secs
Ready.
Available channels: ['C4-A1']


Processing data1 (n=1499) ... :  24%|██▎       | 1117/4728 [02:50<4:31:48,  4.52s/it]



-- now subject ID : PE190970, number of txt files : 2
['PE190970, 박영자_A1_.txt' 'PE190970, 박영자_C4_.txt']
Creating RawArray with float64 data, n_channels=2, n_times=4705739
    Range : 0 ... 4705738 =      0.000 ... 23528.690 secs
Ready.
Available channels: ['C4-A1']


Processing data1 (n=1499) ... :  24%|██▎       | 1118/4728 [03:08<5:32:31,  5.53s/it]



-- now subject ID : PE190980, number of txt files : 2
['PE190980, 김성훈_A1_.txt' 'PE190980, 김성훈_C4_.txt']
Creating RawArray with float64 data, n_channels=2, n_times=5663932
    Range : 0 ... 5663931 =      0.000 ... 28319.655 secs
Ready.
Available channels: ['C4-A1']


Processing data1 (n=1499) ... :  24%|██▎       | 1119/4728 [03:28<7:01:16,  7.00s/it]



-- now subject ID : PE190986, number of txt files : 2
['PE190986, 황신영_A1_.txt' 'PE190986, 황신영_C4_.txt']
Creating RawArray with float64 data, n_channels=2, n_times=5011749
    Range : 0 ... 5011748 =      0.000 ... 25058.740 secs
Ready.
Available channels: ['C4-A1']


Processing data1 (n=1499) ... :  24%|██▎       | 1120/4728 [03:56<9:36:24,  9.59s/it]



-- now subject ID : PE190989, number of txt files : 2
['PE190989, 안석현_A1_.txt' 'PE190989, 안석현_C4_.txt']
Creating RawArray with float64 data, n_channels=2, n_times=4914909
    Range : 0 ... 4914908 =      0.000 ... 24574.540 secs
Ready.
Available channels: ['C4-A1']


Processing data1 (n=1499) ... :  24%|██▎       | 1121/4728 [04:13<10:51:50, 10.84s/it]



-- now subject ID : PE190994, number of txt files : 2
['PE190994, 황기옥_A1_.txt' 'PE190994, 황기옥_C4_.txt']
Creating RawArray with float64 data, n_channels=2, n_times=5696349
    Range : 0 ... 5696348 =      0.000 ... 28481.740 secs
Ready.
Available channels: ['C4-A1']


Processing data1 (n=1499) ... :  24%|██▎       | 1122/4728 [04:33<12:29:30, 12.47s/it]



-- now subject ID : PE191000, number of txt files : 2
['PE191000, 김귀영_A1_.txt' 'PE191000, 김귀영_C4_.txt']


In [None]:
9113688/2

In [None]:
QNAN

In [None]:
import numpy as np
a = np.array([])
x1 = np.array([1,2,3])
x2 = np.array([2,4,6,8,10])

# y = np.concatenate((a, x1))
x2[:-2]

In [None]:
raw = np2raw.raw
raw

In [None]:
raw_re_ref = np2raw.raw_re_ref
raw_re_ref

In [None]:
raw_re_ref_cropped

In [None]:
automatic_staging.raw

In [None]:
raw_re_ref_cropped.copy()