In [1]:
import numpy as np
from scipy import signal, misc
import matplotlib.pyplot as plt
import mne
import scipy.io
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix

import xgboost as xgb
import pickle
import os
import pandas as pd
import random
import shutil
from PIL import Image
import torch
import glob
torch.cuda.empty_cache()

from pathlib import Path
from omegaconf import OmegaConf
config = OmegaConf.load('../config/config.yaml')
data_path = r'../data/interim/dataset_1_cheb2/'
output_path = r'../data/processed/deep_learning_data/'

  from .autonotebook import tqdm as notebook_tqdm


In [32]:
def divide_into_epochs(filename, s_data, epoch_length, output_path, data_ext='.npy'):
    start = 0
    end = epoch_length*256
    #step = int((end-start)/2)
    step = int((end-start))
    count = 0
    while end < len(s_data):
        if 'depressed' in filename.lower():

            if data_ext == '.npy':
                # normalize
                s_epoch = s_data[start:end]

                if 0 not in s_epoch.std().values:
                    #s_epoch = (s_epoch-s_epoch.mean())/s_epoch.std()
                    file_path = output_path+'/Depressed/'+filename+str(count)+data_ext
                    np.save(file_path, s_epoch.values.astype(np.double))
                    
            if data_ext == '.jpg':
                file_path = output_path+'train_img/Depressed/'+filename[:-4]+str(count)+data_ext
                im = Image.fromarray(s_data[start:end].values, 'L')
                im.save(file_path)
        elif 'healthy' in filename.lower():
            
            if data_ext == '.npy':
                # normalize
                s_epoch = s_data[start:end]

                if 0 not in s_epoch.std().values:
                    #s_epoch = (s_epoch-s_epoch.mean())/s_epoch.std()
                    file_path = output_path+'/Healthy/'+filename+str(count)+data_ext
                    np.save(file_path, s_epoch.values.astype(np.double))

            if data_ext == '.jpg':
                file_path = output_path+'train_img/Healthy/'+filename[:-4]+str(count)+data_ext
                im = Image.fromarray(s_data[start:end].values, 'L')
                im.save(file_path)
            
        count += 1
        start = start+step
        end = end+step

# resample data using linear interpolation
def resample_linear(original, targetLen):
    original = np.array(original, dtype=np.float64) # filtered signal
    index_arr = np.linspace(0, len(original)-1, num=targetLen , dtype=np.float64)

    index_floor = np.array(index_arr, dtype=int) # round down
    index_ceil = index_floor + 1
    index_rem = index_arr - index_floor

    val1 = original[index_floor]
    val2 = original[index_ceil % len(original)]
    interp = val1 * (1.0-index_rem) + val2 * index_rem
    assert(len(interp) == targetLen)
    return interp

# resample data using linear interpolation
def resample_every_other(original, targetLen):
    original = np.array(original, dtype=np.float64) # filtered signal
    return original[0::2]

Generate data for Dataset 2

In [33]:
data_path = r'../data/interim/Dataset_2/'
output_path = r'../data/processed/deep_learning_data/'
all_files = glob.glob(data_path+'*.csv', recursive=True)
seg_length = 1

for file in all_files:
    s_data = pd.read_csv(file, index_col=False).iloc[:,2:-4]
    filename = Path(file).stem
    s_data = s_data[config['deep_learning_hp']['chs_2']]
    divide_into_epochs(filename, s_data, seg_length, output_path+str(seg_length)+'s_dataset2/resampled', '.npy')

In [75]:
data_path = r'../data/interim/dataset_1_cheb2/'
output_path = r'../data/processed/deep_learning_data/'

all_files = glob.glob(data_path+'*.csv', recursive=True)
all_files = [x for x in all_files if 'eo' not in x.lower()]
seg_length = 1

random.shuffle(all_files)
train, val, test = np.split(all_files, [int(len(all_files)*0.7), int(len(all_files)*0.85)])

depressed = [x for x in all_files if 'depressed' in x.lower()]
healthy = [x for x in all_files if 'healthy' in x.lower()]
traind, vald, testd = np.split(depressed, [int(len(depressed)*0.7), int(len(depressed)*0.85)])
trainh, valh, testh = np.split(healthy, [int(len(healthy)*0.7), int(len(healthy)*0.85)])
train = np.concatenate((traind, trainh))
val = np.concatenate((vald, valh))
test = np.concatenate((testd, testh))

for file in train:
    s_data = pd.read_csv(file, index_col=False).iloc[:,2:-4]
    s_data = s_data[config['deep_learning_hp']['chs']]
    filename = Path(file).stem
    divide_into_epochs(filename, s_data, seg_length, output_path+str(seg_length)+'s_ec/train', '.npy')

for file in test:
    s_data = pd.read_csv(file, index_col=False).iloc[:,2:-4]
    s_data = s_data[config['deep_learning_hp']['chs']]
    filename = Path(file).stem
    divide_into_epochs(filename, s_data, seg_length, output_path+str(seg_length)+'s_ec/test', '.npy')

for file in val:
    s_data = pd.read_csv(file, index_col=False).iloc[:,2:-4]
    s_data = s_data[config['deep_learning_hp']['chs']]
    filename = Path(file).stem
    divide_into_epochs(filename, s_data, seg_length, output_path+str(seg_length)+'s_ec/val', '.npy')

In [68]:
data_path = r'../data/interim/dataset_1_cheb2/'
output_path = r'../data/processed/deep_learning_data/19_ch_data/'
target_fs = 256
fs = 500


for filename in os.listdir(data_path):
    s_data = pd.read_csv(os.path.join(data_path, filename), index_col=False).iloc[:,2:-4]

    # select 19 channels
    s_data = s_data[config['deep_learning_hp']['chs']]
    divide_into_epochs(filename, s_data, 1, output_path, '.npy')

Generate downsampled Dataset 1 for 19 channel case

In [34]:
data_path = r'../data/interim/dataset_1_cheb2/'
output_path = r'../data/processed/deep_learning_data/19_ch_data/downsampled/'
channels_names = config['deep_learning_hp']['chs_2']

target_fs = 256
fs = 500
seg_length = 1

for filename in os.listdir(data_path):
    s_data = pd.read_csv(os.path.join(data_path, filename), index_col=False).iloc[:,2:-4]

    # resample data
    s_data_resampled = pd.DataFrame(columns=channels_names)

    target_length = np.round((len(s_data)/fs) * target_fs)
    for channel in channels_names:
        interp = resample_every_other(s_data[channel], int(target_length))
        s_data_resampled[channel] = interp
    
    # save file
    s_data_resampled = s_data_resampled[channels_names]
    divide_into_epochs(filename, s_data_resampled, seg_length, output_path, '.npy')

In [None]:
data_path = r'../data/interim/dataset_1_cheb2/'
output_path = r'../data/processed/deep_learning_data/'

all_files = glob.glob(data_path+'*.csv', recursive=True)
all_files = [x for x in all_files if 'eo' not in x.lower()]
seg_length = 1

random.shuffle(all_files)
train, val, test = np.split(all_files, [int(len(all_files)*0.7), int(len(all_files)*0.85)])

depressed = [x for x in all_files if 'depressed' in x.lower()]
healthy = [x for x in all_files if 'healthy' in x.lower()]
traind, vald, testd = np.split(depressed, [int(len(depressed)*0.7), int(len(depressed)*0.85)])
trainh, valh, testh = np.split(healthy, [int(len(healthy)*0.7), int(len(healthy)*0.85)])
train = np.concatenate((traind, trainh))
val = np.concatenate((vald, valh))
test = np.concatenate((testd, testh))

for file in train:
    s_data = pd.read_csv(file, index_col=False).iloc[:,2:-4]
    s_data = s_data[config['deep_learning_hp']['chs']]
    filename = Path(file).stem
    divide_into_epochs(filename, s_data, seg_length, output_path+str(seg_length)+'s_ec/train', '.npy')

for file in test:
    s_data = pd.read_csv(file, index_col=False).iloc[:,2:-4]
    s_data = s_data[config['deep_learning_hp']['chs']]
    filename = Path(file).stem
    divide_into_epochs(filename, s_data, seg_length, output_path+str(seg_length)+'s_ec/test', '.npy')

for file in val:
    s_data = pd.read_csv(file, index_col=False).iloc[:,2:-4]
    s_data = s_data[config['deep_learning_hp']['chs']]
    filename = Path(file).stem
    divide_into_epochs(filename, s_data, seg_length, output_path+str(seg_length)+'s_ec/val', '.npy')

In [63]:
length = []
output_path = r'../data/processed/deep_learning_data/no_overlap_data/'

for filename in os.listdir(data_path):
    s_data = pd.read_csv(os.path.join(data_path, filename), index_col=False).iloc[:,2:-4]
    length.append(len(s_data))
    divide_into_epochs(filename, s_data, 1, output_path, '.npy')

['Fp1', 'Fpz', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'FT7', 'FC3', 'FCz', 'FC4', 'FT8', 'T3', 'C3', 'Cz', 'C4', 'T4', 'TP7', 'CP3', 'CPz', 'CP4', 'TP8', 'T5', 'P3', 'Pz', 'P4', 'T6', 'O1', 'Oz', 'O2']
['Fp1', 'Fpz', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'FT7', 'FC3', 'FCz', 'FC4', 'FT8', 'T3', 'C3', 'Cz', 'C4', 'T4', 'TP7', 'CP3', 'CPz', 'CP4', 'TP8', 'T5', 'P3', 'Pz', 'P4', 'T6', 'O1', 'Oz', 'O2']
['Fp1', 'Fpz', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'FT7', 'FC3', 'FCz', 'FC4', 'FT8', 'T3', 'C3', 'Cz', 'C4', 'T4', 'TP7', 'CP3', 'CPz', 'CP4', 'TP8', 'T5', 'P3', 'Pz', 'P4', 'T6', 'O1', 'Oz', 'O2']
['Fp1', 'Fpz', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'FT7', 'FC3', 'FCz', 'FC4', 'FT8', 'T3', 'C3', 'Cz', 'C4', 'T4', 'TP7', 'CP3', 'CPz', 'CP4', 'TP8', 'T5', 'P3', 'Pz', 'P4', 'T6', 'O1', 'Oz', 'O2']
['Fp1', 'Fpz', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'FT7', 'FC3', 'FCz', 'FC4', 'FT8', 'T3', 'C3', 'Cz', 'C4', 'T4', 'TP7', 'CP3', 'CPz', 'CP4', 'TP8', 'T5', 'P3', 'Pz', 'P4', 'T6', 'O1', 'Oz', 'O2']


In [12]:
# move 200 data from train folder ot test folder
def move_test_data(N, classname, output_path):
    data_list = os.listdir(output_path+'train/'+classname)
    test_data = random.sample(data_list, k=N)

    for file in test_data:
        shutil.move(output_path+'train/'+classname+'/'+file, output_path+'test/'+classname+'/'+file)

move_test_data(500, 'Depressed', output_path)
move_test_data(500, 'Healthy', output_path)

In [2]:
import torch
import torchvision
from torchvision import transforms
from torchvision.datasets import DatasetFolder
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split
from torchvision.datasets import ImageFolder

def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

device = get_default_device()

def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)


#train and test data directory
data_dir = output_path+"train_img/"
test_data_dir = output_path+"test_img/"

def npy_loader(path):
    sample = torch.from_numpy(np.load(path))
    return sample

class AddGaussianNoise(object):
    def __init__(self, mean=0., std=1.):
        self.std = std
        self.mean = mean
        
    def __call__(self, tensor):
        return tensor + torch.randn(tensor.size()) * self.std + self.mean
    
    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)

#load the train and test data
dataset = ImageFolder(
    root=data_dir,transform = transforms.Compose([
    transforms.Grayscale(1),
    transforms.ToTensor()
])
)
test_dataset = ImageFolder(
    root=test_data_dir,transform = transforms.Compose([
    transforms.Grayscale(1),
    transforms.ToTensor()
])
)

img, label = dataset[0]

print("Follwing classes are there : \n",dataset.classes)


def display_img(img,label):
    print(f"Label : {dataset.classes[label]}")
    plt.imshow(img.permute(0,2))

#display the first image in the dataset
#display_img(*dataset[0])

batch_size = 32
val_size = 2000
train_size = len(dataset) - val_size 

train_data,val_data = random_split(dataset,[train_size,val_size])
print(f"Length of Train Data : {len(train_data)}")
print(f"Length of Validation Data : {len(val_data)}")

#output
#Length of Train Data : 12034
#Length of Validation Data : 2000

#load the train and validation into batches.
train_loader = DataLoader(train_data, batch_size, shuffle = True, num_workers = 4)
val_loader = DataLoader(val_data, batch_size, num_workers = 4)

train_loader = DeviceDataLoader(train_loader, device)
val_loader = DeviceDataLoader(val_loader, device)

Follwing classes are there : 
 ['Depressed', 'Healthy']
Length of Train Data : 13905
Length of Validation Data : 2000
