In [1]:
import os 
import pickle
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt


# Denoise
from scipy import signal, stats
from scipy.signal import resample
from scipy.signal import butter, medfilt

from sklearn.preprocessing import MinMaxScaler

sampling_rate = 256
high = 0.5
low = 100
ts = 1 / sampling_rate
nyq = 0.5 * sampling_rate # 나이퀴스트 
order = 4


In [2]:
def butter_highpass_filter(ecg, cutoff, Fs, order):
    nyq = 0.5 * Fs
    normal_cutoff = cutoff / nyq
    
    b, a = signal.butter(order, normal_cutoff, btype='high', analog=False)
    y = signal.filtfilt(b, a, ecg)
    return y

def butter_bandstop_filter(ecg, low, high, Fs, order):
    nyq = 0.5 * Fs
    low = low/ nyq
    high = high/ nyq
    b, a = signal.butter(order, [low, high], btype='bandstop')
    y = signal.filtfilt(b,a,ecg)
    return y

def butter_lowpass_filter(ecg, cutoff, Fs, order):
    nyq = 0.5 * Fs
    normal_cutoff = cutoff / nyq
    
    b, a = signal.butter(order, normal_cutoff, btype='low', analog=False)
    y = signal.filtfilt(b, a, ecg)
    return y

# minmax normalization (0,1)
def minmax(data):
    scaler = MinMaxScaler()
    new = scaler.fit_transform(data.reshape(-1,1))
    return new

### 자동화 (Processd)

In [62]:
data_path = 'D:/Database/J_MAUS/MAUS/Data/Raw_data/'
data_list = os.listdir(data_path)
save_path = "D:/Journal/MAUS_preprocessd/"

for i, file in enumerate(data_list):
    df = pd.read_csv(data_path + file + '/' + os.listdir(data_path + data_list[i])[0])
    resting = pd.read_csv(data_path + file + '/' +os.listdir(data_path + data_list[i])[3])

    print(df.shape, resting.shape)
    
    # data
    for j, name in enumerate(df.columns):
        raw_data = np.array(df[name])
        high_s = butter_highpass_filter(raw_data, high, sampling_rate, order)
        notch1_s = butter_bandstop_filter(high_s, 47, 53, sampling_rate, order)
        notch2_s = butter_bandstop_filter(notch1_s, 57, 63, sampling_rate, order)
        low_s = butter_lowpass_filter(notch2_s, low, sampling_rate, order)

        processed_data = minmax(low_s)

        df['process '+name] = processed_data

    try:
        # resting data
        rest_ecg = np.array(resting['Resting_ECG'])
        
        
        high_s = butter_highpass_filter(rest_ecg, high, sampling_rate, order)
        notch1_s = butter_bandstop_filter(high_s, 47, 53, sampling_rate, order)
        notch2_s = butter_bandstop_filter(notch1_s, 57, 63, sampling_rate, order)
        low_s = butter_lowpass_filter(notch2_s, low, sampling_rate, order)
        
        rest_preprocessd_data = minmax(low_s)
    
        df1 = df.drop(df.columns[0:6], axis=1)
    
        p_dict ={ str(df1.columns[0]) : np.array(df1[df1.columns[0]]),
            str(df1.columns[1]) : np.array(df1[df1.columns[1]]),
             str(df1.columns[2]) : np.array(df1[df1.columns[2]]),
             str(df1.columns[3]) : np.array(df1[df1.columns[3]]),
             str(df1.columns[4]) : np.array(df1[df1.columns[4]]),
             str(df1.columns[5]) : np.array(df1[df1.columns[5]]),
             'Resting' : rest_preprocessd_data
                 
            }
    
        
        
        with open(save_path+str(file)+'.pkl', 'wb') as f:
            pickle.dump(p_dict, f, protocol=pickle.HIGHEST_PROTOCOL)
     
    except:
        pass

(76800, 6) (74970, 3)
(76800, 6) (0, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (75460, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)


### 자동화 (Raw data)

In [63]:
data_path = 'D:/Database/J_MAUS/MAUS/Data/Raw_data/'
data_list = os.listdir(data_path)
save_path = "D:/Journal/MAUS_Raw_DB/"

for i, file in enumerate(data_list):
    df = pd.read_csv(data_path + file + '/' + os.listdir(data_path + data_list[i])[0])
    resting = pd.read_csv(data_path + file + '/' +os.listdir(data_path + data_list[i])[3])

    print(df.shape, resting.shape)
    
    # data
    for j, name in enumerate(df.columns):
        raw_data = np.array(df[name])
        
        df['process '+name] = processed_data

    try:
        # resting data
        rest_ecg = np.array(resting['Resting_ECG'])

    
        df1 = df.drop(df.columns[0:6], axis=1)
    
        p_dict ={ str(df1.columns[0]) : np.array(df1[df1.columns[0]]),
            str(df1.columns[1]) : np.array(df1[df1.columns[1]]),
             str(df1.columns[2]) : np.array(df1[df1.columns[2]]),
             str(df1.columns[3]) : np.array(df1[df1.columns[3]]),
             str(df1.columns[4]) : np.array(df1[df1.columns[4]]),
             str(df1.columns[5]) : np.array(df1[df1.columns[5]]),
             'Resting' : rest_preprocessd_data
                 
            }
    
        
        
        with open(save_path+str(file)+'.pkl', 'wb') as f:
            pickle.dump(p_dict, f, protocol=pickle.HIGHEST_PROTOCOL)
     
    except:
        pass

(76800, 6) (74970, 3)
(76800, 6) (0, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (75460, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)
(76800, 6) (76800, 3)


### 셀 분리

In [3]:
for i, name in enumerate(df.columns):

    raw_data = np.array(df[name])
    high_s = butter_highpass_filter(raw_data, high, sampling_rate, order)
    notch1_s = butter_bandstop_filter(high_s, 47, 53, sampling_rate, order)
    notch2_s = butter_bandstop_filter(notch1_s, 57, 63, sampling_rate, order)
    low_s = butter_lowpass_filter(notch2_s, low, sampling_rate, order)

    preprocessd_data = minmax(low_s)

    df['process '+name]=preprocessd_data
    
    

In [4]:
rest_ecg = np.array(rest['Resting_ECG'])


high_s = butter_highpass_filter(rest_ecg, high, sampling_rate, order)
notch1_s = butter_bandstop_filter(high_s, 47, 53, sampling_rate, order)
notch2_s = butter_bandstop_filter(notch1_s, 57, 63, sampling_rate, order)
low_s = butter_lowpass_filter(notch2_s, low, sampling_rate, order)

rest_preprocessd_data = minmax(low_s)


In [5]:
df1 = df.drop(df.columns[0:6], axis=1)
display(df.head(3))
display(df1.head(3))

Unnamed: 0,Trial 1:0back,Trial 2:2back,Trial 3:3back,Trial 4:2back,Trial 5:3back,Trial 6:0back,process Trial 1:0back,process Trial 2:2back,process Trial 3:3back,process Trial 4:2back,process Trial 5:3back,process Trial 6:0back
0,0.357,-0.278,-0.035,0.313,-0.09,0.097,0.622343,0.562159,0.7015,0.658464,0.597541,0.636285
1,0.328,-0.291,-0.127,0.309,-0.085,0.135,0.615565,0.556588,0.686708,0.65024,0.597955,0.651929
2,0.281,-0.293,-0.142,0.319,-0.083,0.168,0.601116,0.553691,0.671056,0.663668,0.598847,0.668616


Unnamed: 0,process Trial 1:0back,process Trial 2:2back,process Trial 3:3back,process Trial 4:2back,process Trial 5:3back,process Trial 6:0back
0,0.622343,0.562159,0.7015,0.658464,0.597541,0.636285
1,0.615565,0.556588,0.686708,0.65024,0.597955,0.651929
2,0.601116,0.553691,0.671056,0.663668,0.598847,0.668616


In [10]:
p_dict ={ str(df1.columns[0]) : np.array(df1[df1.columns[0]]),
        str(df1.columns[1]) : np.array(df1[df1.columns[1]]),
         str(df1.columns[2]) : np.array(df1[df1.columns[2]]),
         str(df1.columns[3]) : np.array(df1[df1.columns[3]]),
         str(df1.columns[4]) : np.array(df1[df1.columns[4]]),
         str(df1.columns[5]) : np.array(df1[df1.columns[5]]),
         'Resting' : rest_preprocessd_data
        }

In [14]:
# Save dataset 
import pickle
save_path = "D:/Journal/MAUS_preprocessd/"
with open(save_path+'002.pkl', 'wb') as f:
    pickle.dump(p_dict, f, protocol=pickle.HIGHEST_PROTOCOL)