In [1]:
cd drive/MyDrive/ds_project2/

/content/drive/MyDrive/ds_project2


In [2]:
ls

 autoencoder        Autoencoder_mri.ipynb       sex.npy
 autoencoder2_mri   brains.csv                 'smri gan LA5 128.ipynb'
 autoencoder3_mri  'fmri gan LA5 64.ipynb'      start_project.ipynb
 autoencoder4_mri   labels.npy                  tensors.npy
 autoencoder5_mri   preprocessed_mri_data.npy   unrestricted.csv
 autoencoder_mri    [0m[01;34mruns[0m/                       unrestricted_hcp_freesurfer.csv


### Set up configuration

In [3]:
# config parameters
PRETRAIN = True
PATH = 'autoencoder5_mri'
SAVE = False

# Import libraries

In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import collections
from sklearn.decomposition import PCA
from matplotlib import pyplot as plt
from sklearn.feature_selection import SelectKBest
import nibabel
import warnings
warnings.simplefilter("ignore")

%load_ext tensorboard

In [5]:
# X, y = np.load('preprocessed_mri_data.npy'), np.load('sex.npy')
X, y = np.load('tensors.npy'), np.load('labels.npy')
X = X[:, np.newaxis, :, :, :]

print(X.shape, y.shape)

(1113, 1, 58, 70, 58) (1113,)


# Classification on flatten data

Make classification on initial data for gender

In [6]:
from torchvision import datasets, models, transforms
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

def accuracy(X_train, y_train, X_val, y_val):
  clf = LogisticRegression(random_state=0).fit(X_train.reshape(X_train.shape[0], np.prod(X_train.shape[1:])), y_train)
  return clf.predict(X_val.reshape(X_val.shape[0], np.prod(X_val.shape[1:])))

print('Accuracy score on initial data', accuracy_score(y_val, accuracy(X_train, y_train, X_val, y_val)))

Accuracy score on initial data 0.9327354260089686


## Autoencoder

In [8]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

In [9]:
class MriData(Dataset):
    def __init__(self, X, y):
        super(MriData, self).__init__()
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y).long()

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [10]:

train_dataset = MriData(X_train, y_train)
test_dataset = MriData(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=45, shuffle=True) 
val_loader = DataLoader(test_dataset, batch_size=28, shuffle=False)

In [11]:
X_train.shape

(890, 1, 58, 70, 58)

In [12]:
n_features = 16
n_outputs = 1000

In [13]:
# both discriminator and standard classifier (with different number of outputs)
class Encoder(nn.Module):
    def __init__(self, n_features=n_features, n_outputs=n_outputs):
        super(Encoder, self).__init__()
        self.main = nn.ModuleList([
            nn.Conv3d(1, n_features, kernel_size=4, stride=2, padding=1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv3d(n_features, n_features * 2, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm3d(n_features * 2),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv3d(n_features * 2, n_features * 4, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm3d(n_features * 4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv3d(n_features * 4, n_features * 8, kernel_size=4, stride=2, padding=1, bias=False)])
        self.main2 = nn.ModuleList([
            nn.BatchNorm3d(n_features * 8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Flatten(),
            nn.Linear(4608, n_outputs), # outputs logits
        ])

    def forward(self, x):
        for i in range(len(self.main)):
            # print()
            x = self.main[i](x)
        shape = x.shape
        for i in range(len(self.main2)):
            # print()
            x = self.main2[i](x)
        return x, shape

In [14]:
class Decoder(nn.Module):
    def __init__(self, n_outputs=n_outputs, n_features=n_features):
        super(Decoder, self).__init__()
        self.latent_size = n_outputs
        self.main = nn.ModuleList([                      
            nn.ConvTranspose3d(n_features * 8, n_features * 4, kernel_size=4, stride=2, padding=1,output_padding=(1, 0, 1), bias=False),
            nn.BatchNorm3d(n_features * 4),
            nn.ReLU(True),
            nn.ConvTranspose3d(n_features * 4, n_features * 2, kernel_size=4, stride=2, padding=1, output_padding=(0, 1, 0),bias=False),
            nn.BatchNorm3d(n_features * 2),
            nn.ReLU(True),
            nn.ConvTranspose3d(n_features * 2, n_features, kernel_size=4, stride=2, padding=1, output_padding=(1, 1, 1), bias=False),
            nn.BatchNorm3d(n_features),
            nn.ReLU(True),
            nn.ConvTranspose3d(n_features, 1, kernel_size=4, stride=2, padding=1, bias=False),
        ])

    def forward(self, x):
        for i in range(len(self.main)):
            x = self.main[i](x)
        return x

In [15]:
encoder = Encoder(n_features, n_outputs)
decoder = Decoder(n_outputs, n_features)

In [16]:
class MyFirstAE(nn.Module):
    def __init__(self, encoder, decoder, n_outputs):
        super(MyFirstAE, self).__init__()
        self.encoder = encoder
        self.n_outputs = n_outputs
        self.fc = nn.Linear(n_outputs, 4608)
        self.decoder = decoder
    
    def forward(self, x):
        """
        Take a mini-batch as an input, encode it to the latent space and decode back to the original space
        x_out = decoder(encoder(x))
        :param x: torch.tensor, (MB, x_dim)
        :return: torch.tensor, (MB, x_dim)
        """
        enc, shape = self.encoder(x)
        x = self.fc(enc)
        x = x.reshape(shape)
        x = self.decoder(x)
        return x

In [17]:
device =  torch.device("cuda" if torch.cuda.is_available() else "cpu")

net = MyFirstAE(encoder, decoder, n_outputs)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-2)
scheduler = torch.optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=lambda epoch: 0.95) 
if PRETRAIN:
  net.load_state_dict(torch.load(PATH))

# Set up tensorboard

In [18]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()


In [22]:
%tensorboard --logdir runs

Reusing TensorBoard on port 6006 (pid 231), started 3:40:36 ago. (Use '!kill 231' to kill it.)

<IPython.core.display.Javascript object>

# Train model and the save

In [22]:
def train(epochs, net, criterion, optimizer, train_loader, val_loader,scheduler=None, verbose=True, save_dir=None):
    
    freq = 3
    net.to(device)
    
    for epoch in range(1, epochs+1):
        net.train()

        losses_train = []
        for X, _ in train_loader:
            # Perform one step of minibatch stochastic gradient descent
            X = X.to(device)
            predicts = net.forward(X)
            loss = criterion(predicts, X)
            losses_train.append(loss)   
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()  
            print(loss) 
            writer.add_scalar("Loss/train", loss, epoch)
        # define NN evaluation, i.e. turn off dropouts, batchnorms, etc.
        net.eval()
        for X, _ in val_loader:
            X = X.to(device)
            losses_val = []
            # Compute the validation loss
            predicts = net.forward(X)
            loss = criterion(predicts, X)
            losses_val.append(loss)
            writer.add_scalar("Loss/test", loss, epoch)
        if scheduler is not None:
            scheduler.step()
        
        if verbose and epoch%freq==0:
            mean_val = sum(losses_val)/len(losses_val)
            mean_train = sum(losses_train)/len(losses_train)

            print('Epoch {}/{} || Loss:  Train {:.4f} | Validation {:.4f}'\
                  .format(epoch, epochs, mean_train, mean_val))
    writer.close()

In [23]:
train(20, net, criterion, optimizer, train_loader, val_loader, None) 

tensor(2641.5764, grad_fn=<MseLossBackward>)
tensor(5700.8159, grad_fn=<MseLossBackward>)
tensor(4048.4753, grad_fn=<MseLossBackward>)
tensor(3807.1555, grad_fn=<MseLossBackward>)
tensor(3299.9104, grad_fn=<MseLossBackward>)
tensor(3370.7563, grad_fn=<MseLossBackward>)
tensor(3181.8535, grad_fn=<MseLossBackward>)
tensor(2930.3218, grad_fn=<MseLossBackward>)
tensor(3099.5964, grad_fn=<MseLossBackward>)
tensor(2872.1882, grad_fn=<MseLossBackward>)
tensor(2936.2539, grad_fn=<MseLossBackward>)
tensor(3241.7273, grad_fn=<MseLossBackward>)
tensor(2799.4377, grad_fn=<MseLossBackward>)
tensor(2727.9268, grad_fn=<MseLossBackward>)
tensor(2727.0269, grad_fn=<MseLossBackward>)
tensor(2852.3494, grad_fn=<MseLossBackward>)
tensor(2643.2708, grad_fn=<MseLossBackward>)
tensor(2847.2158, grad_fn=<MseLossBackward>)
tensor(2597.9810, grad_fn=<MseLossBackward>)
tensor(2755.1333, grad_fn=<MseLossBackward>)
tensor(2775.5554, grad_fn=<MseLossBackward>)
tensor(2762.9380, grad_fn=<MseLossBackward>)
tensor(258

In [24]:
# if SAVE:  
torch.save(net.state_dict(), 'autoencoder5_mri')

# Model inference

In [18]:
net.eval()
latent_train, shape_train = net.encoder(torch.tensor(X_train, dtype=torch.float))
latent_test, shape_test = net.encoder(torch.tensor(X_val, dtype=torch.float))

# Classification on latent data

Make classification on latent data for gender and compare obtained results with classification score on initial data



In [19]:
print('Accuracy score on latent data', accuracy_score(y_val, accuracy(latent_train.detach().numpy(), y_train, latent_test.detach().numpy(), y_val)))

Accuracy score on latent data 0.9327354260089686


# Create vectors of average women and men brains

In [46]:
preds = accuracy(latent_train.detach().numpy(), y_train, latent_test.detach().numpy(), y_val)
# index of man and women
idx1 = np.where(preds==1)[0]
idx0 = np.where(preds==0)[0]
# mean latent vector of men and women brain
mean_m = latent_test[idx0, :].mean(axis=0)
mean_w = latent_test[idx1, :].mean(axis=0)
# decode values
decoded_m = net.decoder(torch.tensor(net.fc(mean_m).reshape(shape_test[1:]).unsqueeze(0), dtype=torch.float)).detach().numpy()
decoded_w = net.decoder(torch.tensor(net.fc(mean_w).reshape(shape_test[1:]).unsqueeze(0), dtype=torch.float)).detach().numpy()

In [None]:
mean_brains = []
mean_brains.append(list(decoded_m[0]))
mean_brains.append(list(decoded_w[0]))

brain = pd.DataFrame(mean_brains, columns=list(X_tr.columns), index=['man', 'women'])
brain.to_csv("brain_mri.csv", index=False)

# Classification for age on initial and latent data

In [53]:
age_labels = pd.read_csv('age.csv')

In [54]:
le = LabelEncoder()
le.fit(age_labels)
y_age = le.transform(age_labels)

In [63]:
np.unique(y_age, return_counts=True)

(array([0, 1, 2, 3]), array([231, 486, 383,  13]))

In [66]:
age_labels.value_counts()

Age  
26-30    486
31-35    383
22-25    231
36+       13
dtype: int64

In [None]:
# from sklearn.model_selection import cross_validate
# from sklearn.multiclass import OneVsRestClassifier
# from sklearn.multiclass import OneVsOneClassifier


In [55]:
# X_train_age, X_val_age, y_train_age, y_val_age = train_test_split(X, y_age, test_size=0.2, stratify=y_age, random_state=42)