# Full NN Build

## Setup general del experimento

In [18]:
learning_rate = 0.003
acc = 0.  
epoca = 0 
input_file = '/data/concentlite.csv'
EXP_NAME = 'EXP006'
MIN_ACC = 1.0       # Defino mínimo accuracy deseado
MIN_ERROR = 1E6     # Inicializo la variable para registrar el mínimo error cometido.
MAX_EPOCAS = 1000    # Defino el número máximo de épocas de entrenamiento.
MAX_COUNTER = 50   # Defino el máximo número de épocas  sin mejorar el error de validación stop train
BATCH_SIZE = 10     # Número de patrones en cada batch

In [19]:
import numpy as np
import os
import math
import time
import pandas as pd
# NN
import torch
import torch.nn as nn
import torch.nn.functional as F
#from torch.utils.data import random_split
from torch.utils.data import DataLoader, Dataset
from torch import optim
from copy import deepcopy
from sklearn.model_selection import train_test_split
## Ploting
import matplotlib.pyplot as plt
%matplotlib inline 
from IPython.display import set_matplotlib_formats
from matplotlib.colors import to_rgba
## Progress bar
from tqdm.notebook import tqdm
# Path
import sys
sys.path.append('/home/sebacastillo/neuralnets/')
from src.utils import get_project_root
root = get_project_root()
## Check torch version
print(f'Using {torch.__version__}')


Using 2.0.0+cu117


In [20]:
device = torch.device('cuda' if torch.cuda.is_available() else torch.device('cpu'))
torch.manual_seed(42)
# GPU operations have a separate seed we also want to set
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)
# Additionally, some operations on a GPU are implemented stochastic for efficiency
# We want to ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [21]:
def load_split_save_data(input_filename, output_name='EXP', split_type='train_test', train_ratio=0.75, validate_ratio=None, test_ratio=None):

    data = pd.read_csv(input_filename)

    # Check if 'exp' folder exists, create it if it doesn't
    if not os.path.exists('exp'):
        os.makedirs('exp')
    
    # Create a subfolder with the output_name
    output_path = os.path.join('exp', output_name)
    if not os.path.exists(output_path):
        os.makedirs(output_path)
        
    if split_type == 'train_validate_test':
        if not validate_ratio or not test_ratio:
            raise ValueError("Please provide validate_ratio and test_ratio for 'train_validate_test' split type.")
        
        train_data, temp_data = train_test_split(data, train_size=train_ratio, random_state=42)
        validate_data, test_data = train_test_split(temp_data, train_size=validate_ratio / (validate_ratio + test_ratio), random_state=42)
        
        # Save the train, validate, and test data as CSV files in the output folder
        train_data.to_csv(os.path.join(output_path, f'{output_name}_train_data.csv'), index=False)
        validate_data.to_csv(os.path.join(output_path, f'{output_name}_validate_data.csv'), index=False)
        test_data.to_csv(os.path.join(output_path, f'{output_name}_test_data.csv'), index=False)


        return train_data, validate_data, test_data    

    elif split_type == 'train_test':
        train_data, test_data = train_test_split(data, train_size=train_ratio, random_state=42)
        
        # Save the train and test data as CSV files in the output folder
        train_data.to_csv(os.path.join(output_path, f'{output_name}_train_data.csv'), index=False)
        test_data.to_csv(os.path.join(output_path, f'{output_name}_test_data.csv'), index=False)


        return train_data, test_data
    
    else:
        raise ValueError("Invalid split_type. Use either 'train_validate_test' or 'train_test'.")



In [22]:
class DATASET(Dataset):  
    '''
    Esta clase maneja la lectura de los datos y provee un mecanismo
    para alimentar los modelos con los patrones.
    '''
    
    #===================================================
    def __init__(self, filename):
        
        #------------------------------------
        # LECTURA DE LOS DATOS
        data = pd.read_csv(filename, header=None).to_numpy() # Levanta los datos en formato numpy
        
        #------------------------------------
        # INSERTAMOS COLUMNA DEL "BIAS"
        #bias = -np.ones((len(data), 1))
        #data = np.concatenate((bias, data), axis=1)  # Insertamos el "bias" en la primera columna
        
        #------------------------------------
        # ALEATORIZO LOS PATRONES (filas)
        idxs = np.arange(len(data))  # Genero un vector de índices
        np.random.shuffle(idxs)
        data = data[idxs,:]
        
        #------------------------------------
        # SEPARO LOS DATOS
        self.x = data[:,:-1].astype(np.float32)
        self.y = data[:,-1].astype(np.float32)  # La clase está en la última columna
    
    #===================================================
    def __len__(self):
        '''
        Devuelve el número de patrones en el dataset.
        '''
        return len(self.x)
    
    
    #===================================================
    def __getitem__(self, idx):
        '''
        Devuelve el/los patrones indicados.
        '''
        return self.x[idx,:], self.y[idx]

In [23]:
def plot_scatter_with_labels(data):
    # Filter data by label
    data_label_1 = data[data[:, -1] == 1][:, 0:2]
    data_label_minus_1 = data[data[:, -1] == -1][:, 0:2]

    # Create scatter plots for each label
    plt.scatter(data_label_1[:, 0], data_label_1[:, 1], label='1', alpha=0.5)
    plt.scatter(data_label_minus_1[:, 0], data_label_minus_1[:, 1], label='-1', alpha=0.5)

    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.legend()
    plt.show()

In [24]:
class SimpleNN(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()
        # Initialize the modules we need to build the network
        self.linear1 = nn.Linear(num_inputs, 3, bias=True)
        self.act_fc1 = nn.Tanh() 
        self.linear2 = nn.Linear(3, num_outputs, bias=True)
        self.act_fc2 = nn.Tanh()    


    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        y = self.linear1(x)
        y = self.act_fc1(y)
        y = self.linear2(y)
        y = self.act_fc2(y)
        return y

In [25]:
class L3NN(nn.Module):
    
    def __init__(self, input_size, neurons_l1, neurons_l2, neurons_l3, output_size):
        super(L3NN, self).__init__()
        self.layer1 = nn.Linear(input_size, neurons_l1)
        self.layer2 = nn.Linear(neurons_l2, neurons_l2)
        self.layer3 = nn.Linear(neurons_l3, output_size)

    def forward(self, x):
        x = torch.tanh(self.layer1(x))
        x = torch.tanh(self.layer2(x))
        x = self.layer3(x)
        return x

In [26]:
def train_step(model, data, loss_function, optimizer, device):
    
    model.train()  # Calcula gradientes
    
    N_batches = len(data)  # Número de batches = N_patrones/N_patrones_x_batch
    
    error = 0
    
    #==============================================================
    for idx,(X,y) in enumerate(data):

        #-----------------------------------------------------
        # Convierto los datos en tensores diferenciables
        #-----------------------------------------------------
        X = X.to(device)
        y = y.to(device)

        optimizer.zero_grad()  # Se limpia el caché del optimizador
        
        #----------------
        # Forward pass
        #----------------
        y_pred = model(X)

        #----------------
        # Compute Loss
        #----------------
        if (data.batch_size == 1):
            loss = loss_function(y_pred.squeeze(), y.squeeze())
        else:
            loss = loss_function(y_pred.squeeze(), y)
        
        error += loss.item()
        
        #----------------
        # Backward pass
        #----------------
        loss.backward()
        optimizer.step()
    #==============================================================
    
    error /= N_batches
    
    return error, model

In [27]:
def predict_step(model, data, loss_function, device):
    
    model.eval()  # Turn off Dropouts Layers, BatchNorm Layers etc
    
    N_batches = len(data)  # Número de batches = N_patrones/N_patrones_x_batch
    
    error = 0
    
    Y = torch.tensor([])
    Yp = torch.tensor([])
    
    #==============================================================
    with torch.no_grad():  # Turn off gradients computation
        
        for idx,(X,y) in enumerate(data):

            Y = torch.hstack( (Y, y.flatten()) )

            #-----------------------------------------------------
            # Convierto los datos en tensores diferenciables
            #-----------------------------------------------------
            X = X.to(device)
            y = y.to(device)

            #----------------
            # Forward pass
            #----------------
            y_pred = model(X)
            
            Yp = torch.hstack( (Yp, y_pred.flatten().cpu()) )

            #----------------
            # Compute Loss
            #----------------
            loss = loss_function(y_pred.squeeze(), y.squeeze())

            error += loss.item()
    #==============================================================
    
    error /= N_batches
    
    #------------------
    
    return error, Y, Yp

In [28]:
# Inicialize experiment
datafile = str(root) + input_file
train_data, test_data = load_split_save_data(datafile , output_name= EXP_NAME)
# data
filename_train_data = str(root) + '/exp/' + EXP_NAME + '/' + EXP_NAME  + '_train_data.csv'
filename_test_data = str(root) + '/exp/' + EXP_NAME + '/' + EXP_NAME  + '_test_data.csv'

# Construimos los datasets para entrenamiento y validación
trn = DATASET(filename_train_data)
test = DATASET(filename_test_data)

# Construimos los dataloaders para entrenamiento y validación
train_data = DataLoader(trn, batch_size=BATCH_SIZE, shuffle=True)
validation_data = DataLoader(test, batch_size=BATCH_SIZE, shuffle=False)

# Inicializamos el modelo
modelo = SimpleNN(num_inputs=2, num_outputs=1)
#modelo = L3NN(2, 64, 64, 64, 2)
modelo.to(device)

# Definimos la función de LOSS a utilizar
loss_function = nn.MSELoss(reduction='mean').to(device)
#loss_function = nn.BCELoss().to(device)
#loss_function = nn.BCEWithLogitsLoss().to(device)
#loss_function = nn.CrossEntropyLoss().to(device)

# Definimos el optimizador a utilizar
optimizer = optim.SGD(modelo.parameters(), lr=learning_rate, momentum=0.9)  # 0.9)
#optimizer = optim.Adam(modelo.parameters(), lr=learning_rate)

In [29]:
#train_step(modelo, train_data, loss_function, optimizer, device)

In [30]:
error = []   
accuracy = []  
STOP = False
counter = 0
best_model = None
best_model_weights = None

while (epoca < MAX_EPOCAS) and (acc < MIN_ACC) and (not STOP):

    epoca += 1
    
    # ENTRENAMIENTO    
    _,modelo = train_step(modelo, train_data, loss_function, optimizer, device)
    
    # VALIDACION    
    e,Y,Yp = predict_step(modelo, validation_data, loss_function, device)
    acc = torch.sum(Yp.sign() == Y.sign())/ len(Y)
    
    # ALMACENO MEDIDAS    
    error.append(e)
    accuracy.append(acc)   
    
    # CRITERIO DE CORTE Y ALMACENAMIENTO DEL MODELO   
    if (e < MIN_ERROR):
        MIN_ERROR = e
        counter = 0
        
        
        # Almaceno el modelo        
        best_model = deepcopy(modelo)  # Genero una copia independiente
        best_model_weights = best_model.state_dict()
        
    else:
        counter += 1
        if counter > MAX_COUNTER:
            STOP = True
    
    
    # MUESTRO REPORTE POR PANTALLA (POR EPOCA)    
    if (epoca % 10) == 0:
        print('Epoca: {} -- Error: {:.4}\t--\tTasa acierto [train]: {}\n'.format(epoca, e, acc))

# MUESTRO REPORTE POR PANTALLA (FINAL)
print('='*79)
print('FINAL -- Epoca: {} -- Error: {:.4}\t--\tTasa acierto [train]: {}'.format(epoca, e, acc))
print('='*79)

# GUARDO MEJOR MODELO A DISCO
path_best_m = str(root) + '/exp/' + EXP_NAME + '/' + EXP_NAME  + 'best_model.pt'
torch.save(best_model,
           path_best_m,
           _use_new_zipfile_serialization=True)        

# GUARDAMOS LOS PESOS DEL MEJOR MODELO A DISCO
path_best_m_state_dict = str(root) + '/exp/' + EXP_NAME + '/' + EXP_NAME  + 'best_model_state_dict.pt'
torch.save(best_model.state_dict(),
           path_best_m_state_dict,
           _use_new_zipfile_serialization=True)

B = best_model.linear1.bias.detach().cpu().numpy()
W = best_model.linear1.weight.flatten().detach().cpu().numpy()
print(f'Bias: {B} -- W: {W}')

Epoca: 10 -- Error: 0.9731	--	Tasa acierto [train]: 0.6028708219528198

Epoca: 20 -- Error: 0.9738	--	Tasa acierto [train]: 0.6028708219528198

Epoca: 30 -- Error: 0.9605	--	Tasa acierto [train]: 0.6028708219528198

Epoca: 40 -- Error: 1.012	--	Tasa acierto [train]: 0.6028708219528198

Epoca: 50 -- Error: 0.9297	--	Tasa acierto [train]: 0.6028708219528198

Epoca: 60 -- Error: 0.8934	--	Tasa acierto [train]: 0.6028708219528198

Epoca: 70 -- Error: 0.8214	--	Tasa acierto [train]: 0.5693780183792114

Epoca: 80 -- Error: 0.7564	--	Tasa acierto [train]: 0.679425835609436

Epoca: 90 -- Error: 0.7106	--	Tasa acierto [train]: 0.6985645890235901

Epoca: 100 -- Error: 0.7485	--	Tasa acierto [train]: 0.6746411323547363

Epoca: 110 -- Error: 0.6985	--	Tasa acierto [train]: 0.7129186391830444

Epoca: 120 -- Error: 0.7124	--	Tasa acierto [train]: 0.6842105388641357

Epoca: 130 -- Error: 0.697	--	Tasa acierto [train]: 0.6889952421188354

Epoca: 140 -- Error: 0.6778	--	Tasa acierto [train]: 0.71291863

In [31]:
# Import tensorboard logger from PyTorch
from torch.utils.tensorboard import SummaryWriter
# Load tensorboard extension for Jupyter Notebook, only need to start TB in the notebook
%load_ext tensorboard
# logging dir
loggingdir = str(root) + '/exp/' + EXP_NAME 

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [32]:
def train_model_with_logger(model, optimizer, data_loader, loss_module, val_dataset, num_epochs=MAX_EPOCAS, logging_dir=loggingdir):
    # Create TensorBoard logger
    writer = SummaryWriter(logging_dir)
    model_plotted = False
    
    # Set model to train mode
    model.train() 
    
    # Training loop
    for epoch in tqdm(range(num_epochs)):
        epoch_loss = 0.0
        for X, y in data_loader:
            
            ## Step 1: Move input data to device (only strictly necessary if we use GPU)
            data_inputs = X.to(device)
            data_labels = y.to(device)
            
            # For the very first batch, we visualize the computation graph in TensorBoard
            if not model_plotted:
                writer.add_graph(model, data_inputs)
                model_plotted = True
            
            ## Step 2: Run the model on the input data
            preds = model(data_inputs)
            preds = preds.squeeze(dim=1) # Output is [Batch size, 1], but we want [Batch size]
            
            ## Step 3: Calculate the loss
            loss = loss_module(preds, data_labels.float())
            
            ## Step 4: Perform backpropagation
            # Before calculating the gradients, we need to ensure that they are all zero. 
            # The gradients would not be overwritten, but actually added to the existing ones.
            optimizer.zero_grad() 
            # Perform backpropagation
            loss.backward()
            
            ## Step 5: Update the parameters
            optimizer.step()
            
            ## Step 6: Take the running average of the loss
            epoch_loss += loss.item()
            
        # Add average loss to TensorBoard
        epoch_loss /= len(data_loader)
        writer.add_scalar('training_loss',
                          epoch_loss,
                          global_step = epoch + 1)
        
        # Visualize prediction and add figure to TensorBoard
        # Since matplotlib figures can be slow in rendering, we only do it every 10th epoch
        if (epoch + 1) % 10 == 0:
            fig = visualize_classification(model, val_dataset.x, val_dataset.y)
            writer.add_figure('predictions',
                              fig,
                              global_step = epoch + 1)
    
    writer.close()

In [33]:
@torch.no_grad() # Decorator, same effect as "with torch.no_grad(): ..." over the whole function.
def visualize_classification(model, data, label):
    if isinstance(data, torch.Tensor):
        data = data.cpu().numpy()
    if isinstance(label, torch.Tensor):
        label = label.cpu().numpy()
    data_0 = data[label == 0]
    data_1 = data[label == 1]
    
    fig = plt.figure(figsize=(4,4), dpi=500)
    plt.scatter(data_0[:,0], data_0[:,1], edgecolor="#333", label="Class 0")
    plt.scatter(data_1[:,0], data_1[:,1], edgecolor="#333", label="Class 1")
    plt.title("Dataset samples")
    plt.ylabel(r"$x_2$")
    plt.xlabel(r"$x_1$")
    plt.legend()
    
    # Let's make use of a lot of operations we have learned above
    model.to(device)
    c0 = torch.Tensor(to_rgba("C0")).to(device)
    c1 = torch.Tensor(to_rgba("C1")).to(device)
    x1 = torch.arange(-0.5, 1.5, step=0.01, device=device)
    x2 = torch.arange(-0.5, 1.5, step=0.01, device=device)
    xx1, xx2 = torch.meshgrid(x1, x2, indexing='ij')  # Meshgrid function as in numpy
    model_inputs = torch.stack([xx1, xx2], dim=-1)
    preds = model(model_inputs)
    preds = torch.sigmoid(preds)
    output_image = (1 - preds) * c0[None,None] + preds * c1[None,None]  # Specifying "None" in a dimension creates a new one
    output_image = output_image.cpu().numpy()  # Convert to numpy array. This only works for tensors on CPU, hence first push to CPU
    plt.imshow(output_image, origin='lower', extent=(-0.5, 1.5, -0.5, 1.5))
    plt.grid(False)
    return fig


In [34]:
train_model_with_logger(modelo, optimizer, train_data, loss_function, val_dataset=test)

  0%|          | 0/1000 [00:00<?, ?it/s]