In [1]:
import json
from data_loader import load_split_data
import torch
from torchtext.legacy import data, vocab

In [2]:
import torch
from torchtext.legacy import data
from spacy.lang.es.stop_words import STOP_WORDS
from string import whitespace, punctuation

with open('./datainfo.json', 'r') as f:
    df = json.load(f)  

vocab = df["vocab"]
train_iterator, valid_iterator = load_split_data(df)
len(vocab)

8670

In [3]:

# Hyperparameters
num_epochs = 25
learning_rate = 0.001

INPUT_DIM = len(vocab)
EMBEDDING_DIM = 200
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.2




In [4]:
from model import LSTMTagger, ModelMix
import torch.nn as nn
from torch import optim

model = ModelMix(EMBEDDING_DIM, len(vocab), 3, vocab)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device) #CNN to GPU

ModelMix(
  (ModelD): TextSentimentLinear(
    (embedding): EmbeddingBag(8670, 200, mode=max)
    (fc): Linear(in_features=200, out_features=256, bias=True)
  )
  (ModelE): CNN(
    (conv1): Conv2d(3, 10, kernel_size=(3, 3), stride=(1, 1))
    (conv2): Conv2d(10, 20, kernel_size=(3, 3), stride=(1, 1))
    (conv2_drop): Dropout2d(p=0.5, inplace=False)
    (fc1): Linear(in_features=720, out_features=1024, bias=True)
    (fc2): Linear(in_features=1024, out_features=256, bias=True)
  )
  (lineal1): Linear(in_features=512, out_features=3, bias=True)
)

In [5]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc



In [6]:
def train(model, iterator):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for image, text, labels in iterator:
        
        labels = labels.to(device)
        text = text.type(torch.int64).to(device)
        
        optimizer.zero_grad()
        predictions = model(text)
        print(predictions)
        loss = criterion(predictions, labels)
        acc = binary_accuracy(predictions, labels)

        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [7]:

import PIL
import os  


def uwu():

    epochs = 5
    steps = 0
    running_loss = 0
    print_every = 1
    train_losses, test_losses = [], []
    

    for epoch in range(epochs):
        try :
            for image, text, labels in train_iterator:
                steps += 1                
            
                image = image.to(device)
                labels = labels.to(device)
                text = text.type(torch.int64).to(device)
                
                optimizer.zero_grad()
                predictions = model(image, text)
                loss = criterion(predictions, labels)
                
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
                
                if steps % print_every == 0:
                    test_loss = 0
                    accuracy = 0
                    total = 0
                    model.eval()
                    with torch.no_grad():
                        for image, text, labels in valid_iterator:
                            
                            labels = labels.to(device)
                            text = text.type(torch.int64).to(device)
                            image = image.to(device)
                            
                            optimizer.zero_grad()
                            predictions = model(image, text)
                            
                            loss = criterion(predictions, labels)
                            test_loss += loss.item()
                            
                            val, ind = predictions.max(1)              
                            accuracy += (ind == labels).sum()
                            total += len(labels)

                    train_losses.append(running_loss/len(train_iterator))
                    test_losses.append(test_loss/len(valid_iterator))                    
                    print(f"Epoch {epoch+1}/{epochs}.. "
                        f"Train loss: {running_loss/print_every:.3f}.. "
                        f"Test loss: {test_loss/len(valid_iterator):.3f}.. "
                        f"Test accuracy: {float(accuracy)/float(total):.3f}")
                    running_loss = 0
                    accuracy = 0
                    total =0
                    model.train()
        except PIL.UnidentifiedImageError as error:
            print(error)
            er = str(error).split("'")
            os.remove(er[1])
            uwu()
        
uwu()

Epoch 1/5.. Train loss: 1.068.. Test loss: 0.484.. Test accuracy: 0.685




Epoch 1/5.. Train loss: 0.492.. Test loss: 0.919.. Test accuracy: 0.662
Epoch 1/5.. Train loss: 0.840.. Test loss: 2.091.. Test accuracy: 0.670
Epoch 1/5.. Train loss: 2.174.. Test loss: 1.204.. Test accuracy: 0.667
Epoch 1/5.. Train loss: 1.039.. Test loss: 0.526.. Test accuracy: 0.733
Epoch 1/5.. Train loss: 0.555.. Test loss: 0.449.. Test accuracy: 0.789
Epoch 1/5.. Train loss: 0.534.. Test loss: 0.419.. Test accuracy: 0.833
Epoch 1/5.. Train loss: 0.480.. Test loss: 0.406.. Test accuracy: 0.842
Epoch 1/5.. Train loss: 0.385.. Test loss: 0.397.. Test accuracy: 0.825
Epoch 1/5.. Train loss: 0.481.. Test loss: 0.412.. Test accuracy: 0.822
Epoch 1/5.. Train loss: 0.426.. Test loss: 0.550.. Test accuracy: 0.718
Epoch 1/5.. Train loss: 0.402.. Test loss: 0.388.. Test accuracy: 0.833
Epoch 1/5.. Train loss: 0.446.. Test loss: 0.374.. Test accuracy: 0.846
Epoch 1/5.. Train loss: 0.341.. Test loss: 0.397.. Test accuracy: 0.814
Epoch 1/5.. Train loss: 0.396.. Test loss: 0.362.. Test accuracy

In [None]:
import time

t = time.time()
loss=[]
acc=[]
val_acc=[]

for epoch in range(num_epochs):
    
    train_loss, train_acc = train(model, train_iterator)
    valid_acc = evaluate(model, valid_iterator)
    
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Acc: {valid_acc*100:.2f}%')
    
    loss.append(train_loss)
    acc.append(train_acc)
    val_acc.append(valid_acc)
    
print(f'time:{time.time()-t:.3f}')

