<a href="https://colab.research.google.com/github/alessiomongoli/Sentiment_Lexicon/blob/main/Neural_model/Train_predict_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from Neural_model.Neural import RegressionModel
from Neural_model.EarlyStopping import EarlyStopping

In [None]:
import torch
import torch.optim as optim
from torch.nn import MSELoss
import numpy as np
import time
# Garbage Collector
import gc

checkpoint_path= '/content/drive/MyDrive/Github/Colab Notebooks/project/checkpoint/checkpoint.pt'

def train(dataset, category, negation_type, batch_size=32, n_workers=2, lr=1e-3, n_epochs=100):
    start = time.time()
    """
    Trains the regression network given the seed dataset as input.
    """
    torch.manual_seed(11)
    # Mean Squared Error Loss
    loss = MSELoss()
    # TODO: Modificare il path
    early_stopping = EarlyStopping(verbose=True, path=checkpoint_path)
    train_dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=int(n_workers))
    # We use a Regression Model
    model = RegressionModel(low=dataset.get_min_score(), high=dataset.get_max_score())
    # Adaptive Moment Estimation is an efficient algorithm for optimization technique for gradient descent
    optimizer = optim.Adam(model.parameters(), lr=lr)
    # For GPU usage
    model.cuda()
    for epoch in range(n_epochs):
        losses = list()
        for wvs, scores in train_dataloader:
            # Set the gradients of all optimized torch.Tensor s to zero. 
            # PyTorch, by default, accumulates gradients after each backward pass. We need to manually set the gradients to zero before starting a new iteration
            optimizer.zero_grad()
            wvs = wvs.cuda()
            scores = scores.reshape(-1, 1)
            scores = scores.cuda()
            prediction = model(wvs).cuda()
            batch_loss = loss(prediction, scores)
            losses.append(batch_loss.item())
            # Recurrent Back-propagation
            batch_loss.backward()
            optimizer.step()
        epoch_loss = np.mean(np.array(losses))
        early_stopping(epoch_loss, model, epoch)
        if early_stopping.early_stop:
            print("Early stopping")
            break
        gc.collect()
        torch.cuda.empty_cache()
    # TODO: Modificare il percorso di torch.load
    model.load_state_dict(torch.load(checkpoint_path))
    end = time.time()

    with open('/content/drive/MyDrive/Github/Colab Notebooks/project/Results/time.txt', 'a') as f:
      f.writelines('\n'+category+'_'+negation_type+' Training phase: '+str(end-start)+' seconds')
      f.close()
    return model

def predict(model, test_dataset, category, negation_type):
    
    start = time.time()
    """
    Expands label over to the test dataset of non-seed words.
    """
    model.cuda()
    model.eval()
    results = {}
    test_dataloader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=64,
        shuffle=True,
        num_workers=2)
    with torch.no_grad():
        for wv, w in test_dataloader:
            wv = wv.cuda()
            pred = model(wv)
            for word, score in zip(w, pred.cpu().squeeze().tolist()):
                results[word] = score
    
    end = time.time()
    with open('/content/drive/MyDrive/Github/Colab Notebooks/project/Results/time.txt', 'a') as f:
      f.writelines('\n'+category+'_'+negation_type+' Predict phase: '+str(end-start)+' seconds')
      f.close()
    return results
