# 0 Imports

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import widgets, interact

import seaborn as sns
import time

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import normalize
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, classification_report

import scipy.io
import matplotlib.pyplot as plt
from ipywidgets import widgets, interact

import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch import Tensor
import torch.nn as nn
from torch.nn import BatchNorm2d
from torch.nn import Dropout2d
from torch.nn import Sequential
from torch.nn import Linear
from torch.nn import Conv2d
from torch.nn import MaxPool2d
from torch.nn import ReLU
from torch.nn import Softmax
from torch.nn import Module
from torch.nn import CrossEntropyLoss
from torch.optim import SGD, Adam
from torch.nn.init import kaiming_uniform_
from torch.nn.init import xavier_uniform_
 
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torchvision.transforms import Compose
from torchvision.transforms import ToTensor
from torchvision.transforms import Normalize
from torchinfo import summary

from livelossplot import PlotLosses

np.random.seed(0) 
torch.manual_seed(0)
import random
random.seed(0)

In [8]:
# Constants

# path para guardar o dataset
PATH = './'
PATH_TRAIN_CSV = './data/train.csv'
PATH_TRAIN_IMG = './data/train_data.mat'
PATH_TEST_CSV = './data/test.csv'
PATH_TEST_IMG = './data/test_data.mat'

BATCH_SIZE = 32

device = torch.device("cuda")

# 1 Exploração e Preparação dos Dados

### Visualização dos dados

In [13]:
def get_data_from_mat(train_file,test_file):
    train_mat = scipy.io.loadmat(train_file) 
    test_mat = scipy.io.loadmat(test_file) 
    # print(train_mat.keys())
    # print(test_mat.keys())
    train_np = np.array(train_mat['train_data']).transpose(2,0,1)
    test_np = np.array(test_mat['test_data']).transpose(2,0,1)
    # print(train_np.shape)
    # print(test_np.shape)
    return  train_np, test_np


def load_data(path_train_csv, path_train_img, path_test_csv, path_test_img):
    train_csv = pd.read_csv(path_train_csv, header=0)
    test_csv = pd.read_csv(path_test_csv, header=0)
    train_img, test_img = get_data_from_mat(path_train_img, path_test_img)
    # train = train_csv + train_img
    # test = test_csv + test_img
    # return train, test
    return train_csv, test_csv, train_img, test_img


def visualize(image):
    #plt.figure("sample", (12, 6))
    #plt.subplot(1, 2, 1)
    plt.imshow(image, cmap="gray")    
    #plt.subplot(1, 2, 2)
    #plt.imshow(image, cmap="gray")
    plt.show()      

def show_ds(ds):
    print("ds shape:",ds.shape)
    print("ds max:",np.max(ds))
    print("ds min:",np.min(ds))
    print("ds average:",np.average(ds))
    @interact
    def visualize_set(scan_index=(0,len(ds)-1)):
        #print(scan_index)
        visualize(ds[scan_index,:,:])

In [14]:
train_csv, test_csv, train_img, test_img = load_data(PATH_TRAIN_CSV, PATH_TRAIN_IMG, PATH_TEST_CSV, PATH_TEST_IMG)

print("Data de treino:")
print(train_csv)
show_ds(train_img)
print("Data de Teste:")
print(test_csv)
show_ds(test_img)

Data de treino:
      id  age  sex  education
0      1   13    1          7
1      2   14    0          8
2      3   15    1          9
3      4   15    1          9
4      5   15    1          9
..   ...  ...  ...        ...
107  108   77    1          4
108  109   67    0          4
109  110   55    0          4
110  111   76    1          3
111  112   69    0          4

[112 rows x 4 columns]
ds shape: (112, 90, 90)
ds max: 1.0
ds min: 0.0
ds average: 0.024116160549305543


interactive(children=(IntSlider(value=55, description='scan_index', max=111), Output()), _dom_classes=('widget…

Data de Teste:
    id  sex  education
0    1    0         13
1    2    0         11
2    3    1          9
3    4    1         13
4    5    0         12
5    6    0         17
6    7    0          9
7    8    0          4
8    9    1          9
9   10    1          4
10  11    1         14
11  12    0          9
12  13    1          2
13  14    1          5
14  15    1          9
15  16    1         11
16  17    1          9
17  18    0         17
18  19    1         15
19  20    1         14
20  21    1          4
21  22    0          4
22  23    1          4
23  24    0          0
24  25    1          2
25  26    1          4
26  27    1          3
27  28    1          4
ds shape: (28, 90, 90)
ds max: 1.0
ds min: 0.0
ds average: 0.024994026906171023


interactive(children=(IntSlider(value=13, description='scan_index', max=27), Output()), _dom_classes=('widget-…

### Preparação dos dados

In [15]:
def img_to_list(img):
    tamanho = len(img)
    lista = []
    # triangular inferior sem diagonal
    for linha in range(tamanho):
        for coluna in range(linha):
            lista.append(img[linha][coluna])
    return lista

def imgs_to_matrix(imgs):
    matrix = []
    for img in imgs:
        matrix.append(img_to_list(img))
    return matrix

def remove_null_columns(matrix1, matrix2):
    columns_to_remove = []
    m1_row_len = len(matrix1)
    m2_row_len = len(matrix2)
    column_len = len(matrix1[0])
    for column in range(column_len):
        all_zero = True
        for row in range(m1_row_len):
            if matrix1[row][column]!=0:
                all_zero=False
        for row in range(m2_row_len):
            if matrix2[row][column]!=0:
                all_zero=False
        if all_zero:
            columns_to_remove.append(column)

    brain_activity_index = list(range(column_len))
    for column in columns_to_remove[::-1]:
        brain_activity_index.pop(column)
        for line in range(m1_row_len):
            matrix1[line].pop(column)
        for line in range(m2_row_len):
            matrix2[line].pop(column)
    return matrix1, matrix2, brain_activity_index

def join_data(train_csv, test_csv, train_img, test_img):
    train_matrix = imgs_to_matrix(train_img)
    test_matrix = imgs_to_matrix(test_img)
    train_clean_matrix, test_clean_matrix, brain_activity_index = remove_null_columns(train_matrix, test_matrix)
    train_data  = pd.concat([train_csv, pd.DataFrame(train_clean_matrix)], axis=1)
    train_data .columns = list(train_csv.columns) + [f'rel-{brain_activity_index[i]}' for i in range(len(brain_activity_index))]
    test_data = pd.concat([test_csv, pd.DataFrame(test_clean_matrix)], axis=1)
    test_data.columns = list(test_csv.columns) + [f'rel-{brain_activity_index[i]}' for i in range(len(brain_activity_index))]
    return train_data , test_data, brain_activity_index

In [21]:
train_data, test_data, brain_activity_index = join_data(train_csv, test_csv, train_img, test_img)
print(train_data)
print(test_data)

      id  age  sex  education     rel-1  rel-3     rel-4     rel-5     rel-8   
0      1   13    1          7  0.131525    0.0  0.136950  0.000000  0.146309  \
1      2   14    0          8  0.117704    0.0  0.132998  0.037714  0.113425   
2      3   15    1          9  0.155806    0.0  0.165429  0.022278  0.096080   
3      4   15    1          9  0.145214    0.0  0.162493  0.000000  0.124115   
4      5   15    1          9  0.161360    0.0  0.172043  0.017686  0.099488   
..   ...  ...  ...        ...       ...    ...       ...       ...       ...   
107  108   77    1          4  0.215767    0.0  0.184942  0.019446  0.109972   
108  109   67    0          4  0.143005    0.0  0.124350  0.000000  0.090189   
109  110   55    0          4  0.116085    0.0  0.156406  0.020947  0.122589   
110  111   76    1          3  0.156654    0.0  0.166613  0.046464  0.091244   
111  112   69    0          4  0.114586    0.0  0.128877  0.045583  0.124734   

       rel-13  ...  rel-3975  rel-3977 

# 2 Definir o Modelo

# 3 Treinar o Modelo

# 4 Avaliar o Modelo

# 5 Testar o Modelo