## Importando Bibliotecas

In [None]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset

import numpy as np
import pandas as pd
import time

import matplotlib.pyplot as plt
%matplotlib inline

## Definindo Hiperparâmetros

In [None]:
args = {
    'batch_size': 100,
    'num_workers': 16,
    'epoch_num': 300,
    'lr': 0.006,
    'weight_decay': 3e-05,
}

if torch.cuda.is_available():
    args['device'] = torch.device('cuda')
else:
    args['device'] = torch.device('cpu')

args

## Lendo arquivo com os dados

In [None]:
# df = pd.read_csv('dados/sp_completo.csv')
# df = pd.read_csv('dados/carros_sem_outliers.csv')
# df = pd.read_csv('dados/ka_sp.csv')
# df = pd.read_csv('dados/hb20_sp.csv')
# df = pd.read_csv('dados/sp_ka.csv')

df = pd.read_csv('teste.csv')

df.head()

In [None]:
df.describe().round(2)

In [None]:
print(f'Quantidade de registros e de colunas: {df.shape}')

## Separando em conjunto de teste e de treino

In [None]:
indices = torch.randperm(len(df)).tolist()

train_size = int(0.8*len(df))

df_train = df.iloc[indices[:train_size]]
df_test = df.iloc[indices[train_size:]]

df_train.to_csv('car_train.csv', index=False)
df_test.to_csv('car_test.csv', index=False)

print(df_train.shape, df_test.shape)

## Classe Carro

In [None]:
class Car(Dataset):
    def __init__(self, csv_path, columns):
        self.dados = pd.read_csv(csv_path).to_numpy()        
        self.columns = columns

    def __getitem__(self, idx):        
        sample = self.dados[idx][:self.columns]
        label = self.dados[idx][-1:]

        sample = torch.from_numpy(sample.astype(np.float32))
        label = torch.from_numpy(label.astype(np.float32))

        return sample, label

    def __len__(self):
        return len(self.dados)


## Carregando dados de treino e de teste

In [None]:
train_set = Car('car_train.csv', df.shape[1]-1)
test_set = Car('car_test.csv', df.shape[1]-1)

train_loader = DataLoader(train_set,
                          args['batch_size'],
                          num_workers=args['num_workers'],
                          shuffle=True)

test_loader = DataLoader(test_set,
                         args['batch_size'],
                         num_workers=args['num_workers'],
                         shuffle=False)

## Criando MLP

In [None]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, out_size):
        super(MLP, self).__init__()

        self.features = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
        )

        self.classifier = nn.Sequential(
            nn.Linear(hidden_size, out_size),
            nn.ReLU(),
        )

    def forward(self, X):

        hidden = self.features(X)
        output = self.classifier(hidden)

        return output

In [None]:
input_size = train_set[0][0].shape[0]
hidden_size = int((train_set[0][0].shape[0] + 1) / 2)
out_size = 1

net = MLP(input_size, hidden_size, out_size).to(args['device'])

net

## Definindo loss e otimizador

In [None]:
criterion = nn.L1Loss().to(args['device'])
diferenca = nn.L1Loss().to(args['device'])

#optimizer = optim.Adam(net.parameters(), 
#                       lr=args['lr'], 
#                       weight_decay=args['weight_decay'])

optimizer = optim.Adadelta(net.parameters())

dif_train, dif_test = [], []

## Treino e Validação

In [None]:
def train(train_loader, net, epoch):
    # Training mode
    net.train()
    start = time.time()
    epoch_loss  = []    
    epoch_dif = []
    
    for batch in train_loader:
        dado, rotulo = batch
    
        # Cast do dado na GPU
        dado = dado.to(args['device'])
        rotulo = rotulo.to(args['device'])

        optimizer.zero_grad()
        
        # Forward
        ypred = net(dado)
        loss = criterion(ypred, rotulo)
        
        dif = diferenca(ypred, rotulo)
        dif_train.append(dif.cpu().data)
        
        epoch_dif.append(dif.cpu().data)
        epoch_loss.append(loss.cpu().data)

        # Backpropagation
        loss.backward()
        optimizer.step()
   
    epoch_loss = np.asarray(epoch_loss)
    epoch_dif = np.asarray(epoch_dif)
  
    end = time.time()
    # print('#################### Train ####################')
    # print('Epoch %d, Loss: %.4f +/- %.4f, Time: %.2f' % (epoch, epoch_loss.mean(), epoch_dif.mean(), end-start))

    return epoch_loss.mean()

In [None]:
def validate(test_loader, net, epoch):
    # Evaluation mode
    net.eval()
    start = time.time()
    epoch_loss  = []
    epoch_dif = []

    with torch.no_grad(): 
        for batch in test_loader:
            dado, rotulo = batch

            # Cast do dado na GPU
            dado = dado.to(args['device'])
            rotulo = rotulo.to(args['device'])
            
            optimizer.zero_grad()
            
            # Forward
            ypred = net(dado)
            loss = criterion(ypred, rotulo)
            
            dif = diferenca(ypred, rotulo)
            dif_test.append(dif.cpu().data)
            
            epoch_dif.append(dif.cpu().data)
            epoch_loss.append(loss.cpu().data)

    epoch_loss = np.asarray(epoch_loss)
    epoch_dif = np.asarray(epoch_dif)

    end = time.time()
    # print('********** Validate **********')
    # print('Epoch %d, Loss: %.4f +/- %.4f, Time: %.2f\n' % (epoch, epoch_loss.mean(), epoch_dif.mean(), end-start))

    return epoch_loss.mean()

## Treinamento

In [None]:
train_losses, test_losses = [], []
start = time.time()

for epoch in range(args['epoch_num']):
    # Train
    train_losses.append(train(train_loader, net, epoch))

    # Validate
    test_losses.append(validate(test_loader, net, epoch))

end = time.time()

In [None]:
print(f'Duração do Treinamento: {end-start} s')

## Grafico de Convergência

In [None]:
plt.figure(figsize=(30, 10))
plt.plot(train_losses, label='Train')
plt.plot(test_losses, label='Test', linewidth=3, alpha=0.5)
plt.xlabel('Epochs', fontsize=16)
plt.ylabel('Loss', fontsize=16)
plt.title('Convergence', fontsize=16)
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(30, 10))
plt.plot(dif_train, label='Train')
plt.xlabel('Testes', fontsize=16)
plt.ylabel('Diferença', fontsize=16)
plt.title('Convergence Treino', fontsize=16)
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(30, 10))
plt.plot(dif_test, label='Test', linewidth=3, alpha=0.5)
plt.xlabel('Testes', fontsize=16)
plt.ylabel('Diferença', fontsize=16)
plt.title('Convergence Teste', fontsize=16)
plt.legend()
plt.show()

## Melhores Loss

In [None]:
train_losses = np.asarray(train_losses)
test_losses = np.asarray(test_losses)
dif_train = np.asarray(dif_train)
dif_test = np.asarray(dif_test)

print(f'Menor Valor de Loss por Época de Treino: {min(train_losses)}')
print(f'Maior Valor de Loss por Época de Treino: {max(train_losses)}')
print(f'Valor Médio de Loss por Época de Treino: {train_losses.mean()}')

print()

print(f'Menor Valor de Loss por Época de Teste: {min(test_losses)}')
print(f'Maior Valor de Loss por Época de Teste: {max(test_losses)}')
print(f'Valor Médio de Loss por Época de Teste: {test_losses.mean()}')

print()

print(f'Menor Valor de Loss por Registro de Treino: {min(dif_train)}')
print(f'Maior Valor de Loss por Registro de Treino: {max(dif_train)}')
print(f'Valor Médio de Loss por Registro de Treino: {dif_train.mean()}')

print()

print(f'Menor Valor de Loss por Registro de Teste: {min(dif_test)}')
print(f'Maior Valor de Loss por Registro de Teste: {max(dif_test)}')
print(f'Valor Médio de Loss por Registro de Teste: {dif_test.mean()}')

In [None]:
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

In [None]:
# torch.save(net, 'modelo_classificao_t')

In [None]:
# testeModelo = torch.load('modelo_classificao-3')
# testeModelo.eval()

In [None]:
#lista = [1425,45,4,2,2,0,2019, 61000, 1.6,0]
#tensor = torch.FloatTensor(lista)
#valor = testeModelo.forward(tensor).item()
#valor

In [None]:
# vw = pd.read_csv('ka.csv')
#vw.drop(['combustivel', 'blindado', 'cor', 'potenciamotor'], axis=1, inplace=True)
# vw.head()

In [None]:
# dif = []
# real = []
# previsto = []
# for i in range(vw.shape[0]):
#     tensor = torch.FloatTensor(np.asarray(vw.iloc[i, :-1]))    
#     valor = testeModelo.forward(tensor).item()
#     dif.append(float(round(valor-np.asarray(vw.iloc[i,-1]), 2)))
#     print(f"{np.asarray(vw.iloc[i,-1])} -> {round(valor, 2)} => {round(valor-np.asarray(vw.iloc[i,-1]), 2)}")    
#     real.append(vw.iloc[i,-1])
#     previsto.append(valor)

In [None]:
# dif = np.asarray(dif)

In [None]:
# dif

In [None]:
# print(min(dif), max(dif), dif.mean())

In [None]:
# print(min(dif[dif > 0]), max(dif[dif > 0]), dif[dif > 0].mean())

In [None]:
# dif[dif > 0].std()

In [None]:
# plt.figure(figsize=(20, 9))
# plt.plot(real, label='Real', linewidth=3, alpha=0.5)
# plt.plot(previsto, label='Previsto')
# plt.xlabel('Testes', fontsize=16)
# plt.ylabel('Diferença', fontsize=16)
# plt.title('Convergence Teste', fontsize=16)
# plt.legend()
# plt.show()

In [None]:
# plt.figure(figsize=(20, 9))
# plt.plot(real[100:200], label='Real', linewidth=3, alpha=0.5)
# plt.plot(previsto[100:200], label='Previsto')
# plt.xlabel('Testes', fontsize=16)
# plt.ylabel('Diferença', fontsize=16)
# plt.title('Convergence Teste', fontsize=16)
# plt.legend()
# plt.show()