In [None]:
!pip install mne

In [None]:
!pip install --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cu111/torch_nightly.html -U

In [None]:
######################################################################################################
#  ________   _________   ___    ___ ___  ___  ________  ________  ___  __      _______   _______
# |\   ___  \|\___   ___\|\  \  /  /|\  \|\  \|\   __  \|\   ____\|\  \|\  \   /  ___  \ /  ___  \
# \ \  \\ \  \|___ \  \_|\ \  \/  / | \  \\\  \ \  \|\  \ \  \___|\ \  \/  /|_/__/|_/  //__/|_/  /|
#  \ \  \\ \  \   \ \  \  \ \    / / \ \   __  \ \   __  \ \  \    \ \   ___  \__|//  / /__|//  / /
#   \ \  \\ \  \   \ \  \  /     \/   \ \  \ \  \ \  \ \  \ \  \____\ \  \\ \  \  /  /_/__  /  /_/__
#    \ \__\\ \__\   \ \__\/  /\   \    \ \__\ \__\ \__\ \__\ \_______\ \__\\ \__\|\________\\________\
#     \|__| \|__|    \|__/__/ /\ __\    \|__|\|__|\|__|\|__|\|_______|\|__| \|__| \|_______|\|_______|
#                        |__|/ \|__|
######################################################################################################
#
# Data exploration and example file for submission in the NTX Hackathon challenge
#
######################################################################################################

import mne
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
import random

from torch.utils.data import Dataset, DataLoader
from sklearn.pipeline import make_pipeline
from mne.decoding import Vectorizer
from sklearn.decomposition import PCA
from sklearn.linear_model import Ridge

In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(DEVICE)

In [None]:
# Path to training data
train_path = "/home/deepak/learning_project/student/BrainAge/training/"

In [None]:
# Path to testing data (public test set)
test_path = "/home/deepak/learning_project/student/BrainAge/testing_flat/"

In [None]:
# download data https://filesender.renater.fr/?s=download&token=e1de0ec4-09bc-4194-b85b-59830cb04af3
# download test data from https://codalab.lisn.upsaclay.fr/competitions/8336

condition_ec = "EC"  # use only closed eyes condition for demonstration purpose
condition_eo = "EO"  # use only closed eyes condition for demonstration purpose
train_subj = 1200  # use 10 instead of 1200 training subjects, for demonstration purpose
test_subj = 400  # use 10 instead of 400 testing subjects, for demonstration purpose

In [None]:
class CustomDataset(Dataset):
    def __init__(self, path, subj, ages, start):
        self.path = path
        self.subj = subj
        self.ages = ages
        self.start = start
    def __len__(self):
        return self.subj
    def __getitem__(self, idx):
        s = idx + self.start
        fname = f"subj{s:04}_{condition_eo}_raw.fif.gz"
        raw = mne.io.read_raw(self.path + fname, preload=True, verbose='warning')
        d = raw.get_data()
        ft = d.shape[-1]
        data_eo = torch.zeros(129, 10000)
        data_eo[:, :ft] = torch.tensor(d)
        fname = f"subj{s:04}_{condition_ec}_raw.fif.gz"
        raw = mne.io.read_raw(self.path + fname, preload=True, verbose='warning')
        d = raw.get_data()
        ft = d.shape[-1]
        data_ec = torch.zeros(129, 20000)
        data_ec[:, :ft] = torch.tensor(d)
        data = (data_eo, data_ec)
        age = self.ages[idx]
        return data, age

In [None]:
# get the age to predict from the CSV file
meta = pd.read_csv(train_path + "train_subjects.csv")
y_train = []
for age in meta["age"][:train_subj]:
    y_train.append(age)

In [None]:
train_data = CustomDataset(train_path, train_subj, y_train, 1)

In [None]:
test_ages = [0 for i in range(test_subj)]
test_data = CustomDataset(test_path, test_subj, test_ages, 1201)

In [None]:
# DataLoader
batch_size = 8

train_loader = DataLoader(
    train_data,
    batch_size=batch_size,
    num_workers=2
)

test_loader = DataLoader(
    test_data,
    batch_size=batch_size,
    num_workers=2
)

In [None]:
if 1 :
    class CNN(nn.Module):
        def __init__(self):
            super().__init__()
            self.device = torch.device(DEVICE)

            self.classifier = nn.Sequential(
                nn.Flatten(),
                nn.Linear(in_features=3870000, out_features=16, bias=True),
                nn.ReLU(inplace=True),
                nn.Dropout(p=0.5, inplace=False),
                nn.Linear(in_features=16, out_features=4096, bias=True),
                nn.ReLU(inplace=True),
                nn.Dropout(p=0.5, inplace=False),
                nn.Linear(in_features=4096, out_features=1024, bias=True),
                nn.ReLU(inplace=True),
                nn.Dropout(p=0.5, inplace=False),
                nn.Linear(in_features=1024, out_features=1, bias=True)
            )
        def forward(self, x):
            return self.classifier(x)

In [None]:
class DNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.device = torch.device(DEVICE)
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=3870000, out_features=128, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5, inplace=False),
            nn.Linear(in_features=128, out_features=64, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5, inplace=False),
            nn.Linear(in_features=64, out_features=32, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(in_features=32, out_features=16, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(in_features=16, out_features=1, bias=True)
        )
    def forward(self, x):
        return self.classifier(x)

In [None]:
def train(model, device, train_loader, lr, epochs):
    loss_log = []
    model.train()
    lossfunc = nn.L1Loss(reduction='mean')
    optimizer = torch.optim.NAdam(model.parameters(), lr=lr)
    for epoch in range(epochs):
        running_loss = 0.0
        print(f"Epoch #{1 + epoch:02}: ", end="")
        for data, age in train_loader:
            data_eo, data_ec = data
            data_eo = data_eo.to(device)           # shape = (batch_size, 129, 10000)
            data_ec = data_ec.to(device)           # shape = (batch_size, 129, 20000)
            age = age.to(device)                   # shape = (batch_size)
            batch_size = age.size(0)
            x = torch.cat((data_eo, data_ec), -1)  # shape = (batch_size, 129, 30000)
            optimizer.zero_grad()
            output = model(x)
            loss = lossfunc(torch.squeeze(output), age)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            running_loss /= train_subj
            if running_loss < best_running_loss:
                torch.save(model.state_dict(), 'best_model_state.bin')
                best_running_loss = running_loss
        loss_log.append(running_loss)
        print(running_loss)
    model.load_state_dict(torch.load("/content/best_model_state.bin"))
    plt.plot(loss_log)
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.title('Loss')

In [None]:
model_DNN = DNN()
model_DNN = model_DNN.to(DEVICE)

In [None]:
from torchsummary import summary

summary(model_DNN, (129, 30000))

In [None]:
train(model_DNN, DEVICE, train_loader, 0.003, 30)

In [None]:
!nvidia-smi


In [None]:
def test(model, device, test_loader):
    y_pred = []
    for data, age in test_loader:
        data_eo, data_ec = data
        data_eo = data_eo.to(device)           # shape = (batch_size, 129, 10000)
        data_ec = data_ec.to(device)           # shape = (batch_size, 129, 20000)
        x = torch.cat((data_eo, data_ec), -1)  # shape = (batch_size, 129, 30000)
        output = torch.squeeze(model(x))
        #print(output.shape)
        y_pred.extend(list(output.detach().cpu().numpy()))
    return y_pred

In [None]:
y_pred = test(model_DNN, DEVICE, test_loader)

In [None]:
i = 0
j = 0
for x in y_pred:
    if x < 0:
        i += 1
        print(x, j)
    j += 1

In [None]:
# create submission file
submission = []
for subj, pred in zip(range(1201, 1201 + test_subj), y_pred):
    submission.append({"id": subj, "age": pred})
pd.DataFrame(submission).to_csv("mysubmission1.csv", index=False)

# zip the csv file (without anything else) and submit it on the website!