# 0 Imports

In [1]:
import copy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import widgets, interact

import seaborn as sns
import time

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import normalize
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, classification_report

import scipy.io
import matplotlib.pyplot as plt
from ipywidgets import widgets, interact

import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch import Tensor
import torch.nn as nn
from torch.nn import BatchNorm2d
from torch.nn import Dropout2d
from torch.nn import Sequential
from torch.nn import Linear
from torch.nn import Conv2d
from torch.nn import MaxPool2d
from torch.nn import ReLU
from torch.nn import Softmax
from torch.nn import Module
from torch.nn import CrossEntropyLoss
from torch.optim import SGD, Adam
from torch.nn.init import kaiming_uniform_
from torch.nn.init import xavier_uniform_
 
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torchvision.transforms import Compose
from torchvision.transforms import ToTensor
from torchvision.transforms import Normalize
from torchinfo import summary

from livelossplot import PlotLosses

np.random.seed(0) 
torch.manual_seed(0)
import random
random.seed(0)

In [2]:
# Constants

# path para guardar o dataset
PATH = './'
PATH_TRAIN_CSV = './data/train.csv'
PATH_TRAIN_IMG = './data/train_data.mat'
PATH_TEST_CSV = './data/test.csv'
PATH_TEST_IMG = './data/test_data.mat'

BATCH_SIZE = 32

device = torch.device("cuda")

# 1 Exploração e Preparação dos Dados

### Visualização dos dados

In [3]:
def get_data_from_mat(train_file,test_file):
    train_mat = scipy.io.loadmat(train_file) 
    test_mat = scipy.io.loadmat(test_file) 
    # print(train_mat.keys())
    # print(test_mat.keys())
    train_np = np.array(train_mat['train_data']).transpose(2,0,1)
    test_np = np.array(test_mat['test_data']).transpose(2,0,1)
    # print(train_np.shape)
    # print(test_np.shape)
    return  train_np, test_np


def load_data(path_train_csv, path_train_img, path_test_csv, path_test_img):
    train_csv = pd.read_csv(path_train_csv, header=0)
    test_csv = pd.read_csv(path_test_csv, header=0)
    train_img, test_img = get_data_from_mat(path_train_img, path_test_img)
    return train_csv, test_csv, train_img, test_img


def visualize(image):
    #plt.figure("sample", (12, 6))
    #plt.subplot(1, 2, 1)
    plt.imshow(image, cmap="gray")    
    #plt.subplot(1, 2, 2)
    #plt.imshow(image, cmap="gray")
    plt.show()      

def show_ds(ds):
    print("ds shape:",ds.shape)
    print("ds max:",np.max(ds))
    print("ds min:",np.min(ds))
    print("ds average:",np.average(ds))
    @interact
    def visualize_set(scan_index=(0,len(ds)-1)):
        #print(scan_index)
        visualize(ds[scan_index,:,:])

In [4]:
train_csv, test_csv, train_img, test_img = load_data(PATH_TRAIN_CSV, PATH_TRAIN_IMG, PATH_TEST_CSV, PATH_TEST_IMG)

print("Data de treino:")
print(train_csv)
show_ds(train_img)
print("Data de Teste:")
print(test_csv)
show_ds(test_img)

Data de treino:
      id  age  sex  education
0      1   13    1          7
1      2   14    0          8
2      3   15    1          9
3      4   15    1          9
4      5   15    1          9
..   ...  ...  ...        ...
107  108   77    1          4
108  109   67    0          4
109  110   55    0          4
110  111   76    1          3
111  112   69    0          4

[112 rows x 4 columns]
ds shape: (112, 90, 90)
ds max: 1.0
ds min: 0.0
ds average: 0.024116160549305543


interactive(children=(IntSlider(value=55, description='scan_index', max=111), Output()), _dom_classes=('widget…

Data de Teste:
    id  sex  education
0    1    0         13
1    2    0         11
2    3    1          9
3    4    1         13
4    5    0         12
5    6    0         17
6    7    0          9
7    8    0          4
8    9    1          9
9   10    1          4
10  11    1         14
11  12    0          9
12  13    1          2
13  14    1          5
14  15    1          9
15  16    1         11
16  17    1          9
17  18    0         17
18  19    1         15
19  20    1         14
20  21    1          4
21  22    0          4
22  23    1          4
23  24    0          0
24  25    1          2
25  26    1          4
26  27    1          3
27  28    1          4
ds shape: (28, 90, 90)
ds max: 1.0
ds min: 0.0
ds average: 0.024994026906171023


interactive(children=(IntSlider(value=13, description='scan_index', max=27), Output()), _dom_classes=('widget-…

### Preparação dos dados

In [8]:
def fix_sex_id_columns(train_csv, test_csv):
    for csv in [train_csv, test_csv]:
        female = csv['sex']
        male = []
        for person in female:
            if person==1:
                male.append(0)
            else:
                male.append(1)
        csv.drop('sex', axis='columns', inplace=True)
        csv['female']=female
        csv['male']=male
        csv.drop('id', axis='columns', inplace=True)
    return train_csv, test_csv
    
def img_to_list(img):
    tamanho = len(img)
    lista = []
    # triangular inferior sem diagonal
    for linha in range(tamanho):
        for coluna in range(linha):
            lista.append(img[linha][coluna])
    return lista

def imgs_to_matrix(imgs):
    matrix = []
    for img in imgs:
        matrix.append(img_to_list(img))
    return matrix

def join_data(train_csv, test_csv, train_img, test_img):
    train_matrix = imgs_to_matrix(train_img)
    test_matrix = imgs_to_matrix(test_img)
    train_data  = pd.concat([train_csv, pd.DataFrame(train_matrix)], axis=1)
    train_data.columns = list(train_csv.columns) + [f'rel-{i}' for i in range(len(train_matrix[0]))]
    test_data = pd.concat([test_csv, pd.DataFrame(test_matrix)], axis=1)
    test_data.columns = list(test_csv.columns) + [f'rel-{i}' for i in range(len(test_matrix[0]))]
    return train_data , test_data

def remove_non_relations(train_data, test_data):
    for data in [train_data, test_data]:
        data.drop('education', axis='columns', inplace=True)
        data.drop('female', axis='columns', inplace=True)
        data.drop('male', axis='columns', inplace=True)
    return train_data, test_data

def remove_education(train_data, test_data):
    for data in [train_data, test_data]:
        data.drop('education', axis='columns', inplace=True)
    return train_data, test_data

def remove_null_columns(train_data, test_data):
    columns_to_drop = []
    for column_name in test_data.columns:
        if (train_data[column_name] == 0).all() and (test_data[column_name] == 0).all():
            columns_to_drop.append(column_name)

    train_data = train_data.drop(columns_to_drop, axis=1)
    test_data = test_data.drop(columns_to_drop, axis=1)

    return train_data, test_data, columns_to_drop

In [10]:
train_csv, test_csv, train_img, test_img = load_data(PATH_TRAIN_CSV, PATH_TRAIN_IMG, PATH_TEST_CSV, PATH_TEST_IMG)
train_csv_fix, test_csv_fix = fix_sex_id_columns(train_csv, test_csv)

# everything (sex, education, relations)
train_complete_data, test_complete_data = join_data(train_csv_fix, test_csv_fix, train_img, test_img)

print('\nComplete:')
print(train_complete_data.head(1))
print(test_complete_data.head(1))

train_complete_data.to_csv('./processed-data/train_complete_data.csv', index=False)
test_complete_data.to_csv('./processed-data/test_complete_data.csv', index=False)

# without null relations (sex, education, not-null relations)
train_complete_not_null_data, test_complete_not_null_data, unrelated_brain_zones = remove_null_columns(copy.copy(train_complete_data), copy.copy(test_complete_data))

print('\nComplete without null relations:')
print(train_complete_not_null_data.head(1))
print(test_complete_not_null_data.head(1))

print('\nUnrelated brain zones:')
print(unrelated_brain_zones[:50])

train_complete_not_null_data.to_csv('./processed-data/train_complete_not_null_data.csv', index=False)
test_complete_not_null_data.to_csv('./processed-data/test_complete_not_null_data.csv', index=False)

# only sex and not-null relations
train_sex_not_null_data, test_sex_not_null_data = remove_education(copy.copy(train_complete_not_null_data), copy.copy(test_complete_not_null_data))

print('\nOnly sex and without null relations:')
print(train_sex_not_null_data.head(1))
print(test_sex_not_null_data.head(1))

train_sex_not_null_data.to_csv('./processed-data/train_sex_not_null_data.csv', index=False)
test_sex_not_null_data.to_csv('./processed-data/test_sex_not_null_data.csv', index=False)

# only not-null relations
train_relations_not_null_data, test_relations_not_null_data = remove_non_relations(copy.copy(train_complete_not_null_data), copy.copy(test_complete_not_null_data))

print('\nOnly not null relations:')
print(train_relations_not_null_data.head(1))
print(test_relations_not_null_data.head(1))

train_relations_not_null_data.to_csv('./processed-data/train_relations_not_null_data.csv', index=False)
test_relations_not_null_data.to_csv('./processed-data/test_relations_not_null_data.csv', index=False)

# only relations (null and not-null relations)
train_relations_data, test_relations_data = remove_non_relations(copy.copy(train_complete_data), copy.copy(test_complete_data))

print('\nOnly relations:')
print(train_relations_data.head(1))
print(test_relations_data.head(1))

train_relations_data.to_csv('./processed-data/train_relations_data.csv', index=False)
test_relations_data.to_csv('./processed-data/test_relations_data.csv', index=False)


Complete:
   age  education  female  male  rel-0     rel-1  rel-2  rel-3    rel-4   
0   13          7       1     0    0.0  0.131525    0.0    0.0  0.13695  \

   rel-5  ...  rel-3995  rel-3996  rel-3997  rel-3998  rel-3999  rel-4000   
0    0.0  ...       0.0       0.0       0.0       0.0       0.0       0.0  \

   rel-4001  rel-4002  rel-4003  rel-4004  
0  0.585006       0.0  0.280606       0.0  

[1 rows x 4009 columns]
   education  female  male  rel-0     rel-1  rel-2  rel-3     rel-4     rel-5   
0         13       0     1    0.0  0.140891    0.0    0.0  0.158141  0.034232  \

   rel-6  ...  rel-3995  rel-3996  rel-3997  rel-3998  rel-3999  rel-4000   
0    0.0  ...       0.0       0.0       0.0       0.0       0.0       0.0  \

   rel-4001  rel-4002  rel-4003  rel-4004  
0  0.612109       0.0  0.193069       0.0  

[1 rows x 4008 columns]

Complete without null relations:
   age  education  female  male     rel-1  rel-3    rel-4  rel-5     rel-8   
0   13          7       1  

# 2 Definir o Modelo

# 3 Treinar o Modelo

# 4 Avaliar o Modelo

# 5 Testar o Modelo