# Simple Stance Detection in Spanish with Linear Model in [Pytorch 1.1](https://pytorch.org/) with [Ignite](https://pytorch.org/ignite/) 

---

* Process Data through pytorch Dataset  
  * Tokenize tweets  
  * Vectorize tweets with FastText word vector representation
* Make batches through pytorch Dataloader  
* Linear Models: Linear Regression and 3-ĺayer Linear Regression
* [Ignite](https://pytorch.org/ignite/) training callbacks
  * Define ignite training loops
  * Add callback for epoch loss and accuracy
  * Add callback for ModelCheckpoint
  * Add callback for EarlyStopping

## Imports

In [2]:
%matplotlib inline
import sys
import re
import os
import pathlib
from collections import Counter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import spacy
import spacy
spacy.load('es_core_news_sm')
from spacy.lang.es import Spanish
from tqdm.auto import tqdm, trange
tqdm.pandas(desc='Progress')

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

import ignite
from ignite.engine import Engine, Events
from ignite.metrics import Accuracy, Loss, RunningAverage
from ignite.handlers import ModelCheckpoint, EarlyStopping
from ignite.contrib.handlers import ProgressBar

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
from gensim.models import fasttext
from gensim.models.keyedvectors import KeyedVectors
from gensim.models.wrappers import FastText

import warnings
warnings.filterwarnings('ignore')

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity='all'

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cpu')

  from pandas import Panel
2020-01-22 20:51:32,055 : INFO : 'pattern' package not found; tag filters are not available for English


In [3]:
print('Python version:',sys.version)
print('Pandas version:',pd.__version__)
print('Pytorch version:', torch.__version__)
print('Spacy version:', spacy.__version__)
print('Ignite version:', ignite.__version__)

Python version: 3.7.5 (default, Oct 25 2019, 15:51:11) 
[GCC 7.3.0]
Pandas version: 0.25.3
Pytorch version: 1.3.1
Spacy version: 2.0.16
Ignite version: 0.2.1


## 1. Process Data 

In [4]:
model_fasttext = FastText.load_fasttext_format('/home/elena/Desktop/fasttext/cc.es.300.bin', encoding='utf-8')
#w2v_vectors = model_fasttext.wv.vectors 
#w2v_indices = {word: model.wv.vocab[word].index for word in model.wv.vocab} 
#num_features = model.vector_size
#vocab = model.wv.vocab.keys()

2020-01-22 20:51:36,519 : INFO : loading 2000000 words for fastText model from /home/elena/Desktop/fasttext/cc.es.300.bin
2020-01-22 20:52:21,803 : INFO : loading weights for 2000000 words for fastText model from /home/elena/Desktop/fasttext/cc.es.300.bin
2020-01-22 20:53:03,459 : INFO : loaded (2000000, 300) weight matrix for fastText model from /home/elena/Desktop/fasttext/cc.es.300.bin


In [5]:
data_root = pathlib.Path('/home/elena/Desktop/Catalonia Independence Corpus/Dataset_Español/DATASET')

In [6]:
# load csv in pandas dataframe
train_df = pd.read_csv(os.path.join(data_root, 'train_hSVM_castellano_dataset.csv'), sep='\t', error_bad_lines=False)
val_df =  pd.read_csv(os.path.join(data_root, 'val_hSVM_castellano_dataset.csv'), sep='\t', error_bad_lines=False)
test_df =  pd.read_csv(os.path.join(data_root, 'test_hSVM_castellano_dataset.csv'), sep='\t', error_bad_lines=False)

train_df.shape, val_df.shape, test_df.shape

((6046, 7), (2015, 7), (2016, 7))

### Calculate the average over the word vectors

In [7]:
def featureVecMethod(words, w2v_model, num_features=300):
        # Pre-initialising empty numpy array for speed
    featureVec = np.zeros(num_features,dtype="float32")
    nwords = 0
    #append a vector to each word
    for word in words:
        try: 
            nwords = nwords + 1
            v = w2v_model[word]
            #print(word)
            #print(v)
            #if np.isnan(v).any():
                #print(word, v)
        except KeyError:
            continue
        #featureVec = featureVec + w2v_model[word]
        featureVec = featureVec + v
        #print(word, featureVec)    
    # dividing the result by number of words to get average
    if nwords != 0:
        featureVec = featureVec/nwords
    print(featureVec)
    return featureVec

def getAvgFeatureVecsOOV(tweet, w2v_model, num_features=300):
    tweetFeatureVec = np.zeros(num_features,dtype="float32")
    #    all_tweets = len(tweets)
    tweetFeatureVec = featureVecMethod(tweet, w2v_model, num_features)
    print(tweetFeatureVec)

    return tweetFeatureVec

tweet = ['arrimadas', 'se', 'presenta', 'a', 'las', 'generales', 'sacar', 'a', 'sánchez', 'de', 'moncloa', '#efeurgente']
vec = featureVecMethod(tweet, model_fasttext, num_features=300)

[ 6.77645160e-03 -1.36414589e-02  2.44615078e-02  2.20699459e-02
  4.97638108e-03  3.50623466e-02 -5.11929253e-03 -1.99633017e-02
 -5.84235275e-03 -8.87753665e-02 -4.62326072e-02  3.13383457e-03
  4.49324213e-03  1.50109613e-02  1.32593149e-02  5.76253096e-03
  8.75507668e-03 -2.38710851e-03 -6.78000972e-03 -1.77039485e-02
 -4.88323756e-02  4.21276651e-02 -2.91033890e-02 -1.19686285e-02
 -7.60351494e-02 -7.22085238e-02  3.53940465e-02  1.36187607e-02
  4.06890959e-02  6.96457876e-03  1.27535907e-03  1.57527160e-02
 -1.18059712e-03 -1.80911534e-02  3.89196463e-02  2.46073660e-02
 -4.82164603e-03 -3.01716700e-02 -1.79141443e-02 -2.14875787e-02
 -8.11079368e-02  4.31352668e-02  7.64925554e-02 -2.97837555e-02
 -1.14156129e-02  2.30263565e-02  3.38370465e-02 -2.06694026e-02
 -1.34634385e-02 -4.39084182e-03  2.97033731e-02  4.73945588e-02
 -1.11154197e-02  1.75611544e-02  2.06049811e-02 -7.17070773e-02
  1.67260841e-02 -1.64976325e-02  2.63322424e-02 -2.33879834e-02
 -4.75477008e-03  9.36703

In [8]:
class StanceDataset(Dataset):
    """Define the pytorch Dataset to process the tweets
       This class can be used for both training and validation dataset
       Run it for training data and pass the word2idx and idx2word when running
       for validation data
    """
    def __init__(self, df, w2v_model=None):
        print('Processing Data')
        self.df = df
        print('Removing white space...')
        self.df.CLEAN_FULL = self.df.CLEAN_FULL.progress_apply(lambda x: x.strip())
        self.w2v_model = w2v_model
        #self.nlp = Spanish(path=None)
        self.nlp = spacy.load('es_core_news_sm',disable=['parser', 'tagger', 'ner'])
        self.label2idx = {'FAVOR':0, 'AGAINST':1, 'NEUTRAL':2}
        print('*'*100)
        print('Dataset info:')
        print(f'Number of Tweets: {self.df.shape[0]}')
        #print(f'Vocab Size: {len(self.word2idx)}')
        print('*'*100)
    
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx):
        sent = self.df.CLEAN_FULL[idx]
        tokens = [w.text.lower() for w in self.nlp(self.tweet_clean(sent))]
        v = self.vector_average(tokens, self.w2v_model, num_features=300)
        vec = torch.from_numpy(v).float()
        return vec, self.label2idx[self.df.LABEL[idx]]
    
    def tweet_clean(self, text):
        """Very basic text cleaning. This function can be built upon for
           better preprocessing
        """
        text = re.sub(r'[\s]+', ' ', text) # replace multiple white spaces with single space
        return text.strip()
    
    def vector_average(self, words, w2v_model, num_features):
        featureVec = np.zeros(num_features, dtype="float32")
        nwords = 0
        for word in words:
            try: 
                nwords = nwords + 1
                v = w2v_model[word]
            except KeyError:
                continue
            featureVec = featureVec + v
            #print(word, featureVec)    
        if nwords != 0:
            featureVec = featureVec/nwords
        #print(featureVec)
        return featureVec

### Training dataset

In [9]:
train_ds = StanceDataset(train_df, w2v_model=model_fasttext)
print(type(train_ds))
print(train_ds)
#vocab_size = len(train_ds.word2idx)

Processing Data
Removing white space...


HBox(children=(FloatProgress(value=0.0, description='Progress', max=6046.0, style=ProgressStyle(description_wi…


****************************************************************************************************
Dataset info:
Number of Tweets: 6046
****************************************************************************************************
<class '__main__.StanceDataset'>
<__main__.StanceDataset object at 0x7f90d87dc790>


### Validation dataset

In [10]:
val_ds = StanceDataset(val_df, w2v_model=model_fasttext)

Processing Data
Removing white space...


HBox(children=(FloatProgress(value=0.0, description='Progress', max=2015.0, style=ProgressStyle(description_wi…


****************************************************************************************************
Dataset info:
Number of Tweets: 2015
****************************************************************************************************


### Test dataset

In [11]:
test_ds = StanceDataset(test_df, w2v_model=model_fasttext)

Processing Data
Removing white space...


HBox(children=(FloatProgress(value=0.0, description='Progress', max=2016.0, style=ProgressStyle(description_wi…


****************************************************************************************************
Dataset info:
Number of Tweets: 2016
****************************************************************************************************


## 2. Make batches through pytorch Dataloader

In [12]:
batch_size = 1024

### Train dataloader

In [13]:
train_dl = DataLoader(train_ds, batch_size=1024, shuffle=True)
#for batch in train_dl:
 #   x, y = batch
  #  print(x)
   # print(y)

### Validation dataloader

In [14]:
val_dl = DataLoader(val_ds, batch_size=batch_size)
print(val_dl)

<torch.utils.data.dataloader.DataLoader object at 0x7f90d8809610>


### Test dataloader

In [15]:
test_dl = DataLoader(test_ds, batch_size=batch_size)

## Linear model

In [16]:
from torch.autograd import Variable

class linearRegression(torch.nn.Module):
    def __init__(self, inputSize, outputSize):
        super(linearRegression, self).__init__()
        self.emb_drop = nn.Dropout(0.3)
        self.linear = torch.nn.Linear(inputSize, outputSize)

    def forward(self, x):
        #out = self.linear(x)
        out = self.linear(self.emb_drop(x))
        return out

In [17]:
class MultiLinearRegression(torch.nn.Module):
    def __init__(self, inputSize, outputSize, n_hidden):
        super(MultiLinearRegression, self).__init__()
        self.dropout = nn.Dropout(0.5)
        self.linear1 = torch.nn.Linear(inputSize, n_hidden[0])
        self.linear2 = torch.nn.Linear(n_hidden[0], n_hidden[1])
        self.linear3 = torch.nn.Linear(n_hidden[1], outputSize)

    def forward(self, x):
        #out = self.linear(x)
        #out = self.linear(self.emb_drop(x))
        out1 = self.dropout(F.relu(self.linear1(x)))
        out2 = self.linear2(F.relu(out1))
        out = self.linear3(out2)
        return out

In [18]:
embedding_dim = 300
n_hidden = [256, 1024]
n_out = 3
learning_rate = 0.001

model = MultiLinearRegression(embedding_dim, n_out, n_hidden).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = F.cross_entropy

## 4. Ignite training callbacks

*Ignite is all about callbacks.  
Training and evaluation is defined separately.  
You can define your single custom training and evaluator loop and add them to Engine.  
Add loss and accuracy to the trainer and evaluator.
Finally define early stopping and modelcheckpoint*

### Define single training and validation loop

In [19]:
def process_function(engine, batch):
    """Single training loop to be attached to trainer Engine
    """
    model.train()
    optimizer.zero_grad()
    x, y = batch
    x, y = x.to(device), y.to(device)
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    loss.backward()
    optimizer.step()
    y_pred_lab = torch.argmax(y_pred, dim=1)
    y_pred_one_hot = torch.zeros(y_pred.size()[0], 3)
    y_one_hot = torch.zeros(y_pred.size()[0], 3)
    y_pred_one_hot[torch.arange(y_pred.size()[0]), y_pred_lab] = 1
    y_one_hot[torch.arange(y_pred.size()[0]), y] = 1
    return loss.item(), y_pred_one_hot, y_one_hot


def eval_function(engine, batch):
    """Single evaluator loop to be attached to trainer and evaluator Engine
    """
    model.eval()
    with torch.no_grad():
        x, y = batch
        x, y = x.to(device), y.to(device)
        y_pred = model(x)
        return y_pred, y
    
trainer = Engine(process_function)
train_evaluator = Engine(eval_function)
validation_evaluator = Engine(eval_function)

### Add metrics (Loss and Accuracy) to the trainer and evaluator

In [20]:
def max_output_transform(output):
    """It convers the predicted ouput probabilties to indexes for accuracy calculation
    """
    y_pred, y = output
    #return torch.max(y_pred, dim=1)[1], y
    y_pred_lab = torch.argmax(y_pred, dim=1)
    y_pred_one_hot = torch.zeros(y_pred.size()[0], 3)
    y_one_hot = torch.zeros(y_pred.size()[0], 3)
    y_pred_one_hot[torch.arange(y_pred.size()[0]), y_pred_lab] = 1
    y_one_hot[torch.arange(y_pred.size()[0]), y] = 1
    return y_pred_one_hot, y_one_hot




# attach running loss (will be displayed in progess bar)
RunningAverage(output_transform=lambda x: x[0]).attach(trainer, 'loss')

# attach running accuracy (will be displayed in progess bar)
#RunningAverage(Accuracy(output_transform=lambda x: [x[1], x[2]])).attach(trainer, 'acc')
RunningAverage(Accuracy(output_transform=lambda x: [x[1], x[2]], is_multilabel=True)).attach(trainer, 'acc')

# attach accuracy and loss to train_evaluator
Accuracy(output_transform=max_output_transform, is_multilabel=True).attach(train_evaluator, 'accuracy')
Loss(loss_fn).attach(train_evaluator, 'ce')

# attach accuracy and loss to validation_evaluator
Accuracy(output_transform=max_output_transform, is_multilabel=True).attach(validation_evaluator, 'accuracy')
Loss(loss_fn).attach(validation_evaluator, 'ce')

### Report progress through tqdm progress bar

In [21]:
pbar = ProgressBar(persist=True, bar_format="")
pbar.attach(trainer, ['loss', 'acc'])

### Log results after each epoch

In [22]:
@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(engine):
    """This function will run after each epoch and 
       report the training loss and accuracy (defined above)
    """
    train_evaluator.run(train_dl)
    metrics = train_evaluator.state.metrics
    avg_accuracy = metrics['accuracy']
    avg_bce = metrics['ce']
    pbar.log_message(
        f'Training Results - Epoch: {engine.state.epoch}  Avg accuracy: {avg_accuracy:.4f} Avg loss: {avg_bce:.4f}')
    
@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(engine):
    """This function will run after each epoch and 
       report the validation loss and accuracy (defined above)
    """
    validation_evaluator.run(val_dl)
    metrics = validation_evaluator.state.metrics
    avg_accuracy = metrics['accuracy']
    avg_bce = metrics['ce']
    pbar.log_message(
        f'Validation Results - Epoch: {engine.state.epoch}  Avg accuracy: {avg_accuracy:.4f} Avg loss: {avg_bce:.4f}')
    pbar.n = pbar.last_print_n = 0

### Callback for Early stopping and ModelCheckpoint

In [23]:
def score_function(engine):
    """EarlyStopping will call this function to check if score improved
    """
    val_loss = engine.state.metrics['ce']
    return -val_loss

early_stopping = EarlyStopping(patience=3, score_function=score_function, trainer=trainer)
validation_evaluator.add_event_handler(Events.COMPLETED, early_stopping)

checkpointer = ModelCheckpoint(
    './models', 
    'text_gru_concat', 
    save_interval=1, 
    n_saved=1, 
    create_dir=True, 
    save_as_state_dict=True,
    require_empty=False
)

trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, {'stance': model})

<ignite.engine.engine.RemovableEventHandle at 0x7f8f2b10c4d0>

<ignite.engine.engine.RemovableEventHandle at 0x7f8f2b520350>

## Train the model

In [24]:
trainer.run(train_dl, max_epochs=100)

2020-01-22 20:54:24,256 : INFO : Engine run starting with max_epochs=100.


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:54:27,082 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:54:27,085 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:54:29,588 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:54:29,589 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:54:29,589 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 1  Avg accuracy: 0.4160 Avg loss: 1.0131


2020-01-22 20:54:30,532 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:54:30,533 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 1  Avg accuracy: 0.3881 Avg loss: 1.0252


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:54:33,088 : INFO : Epoch[2] Complete. Time taken: 00:00:02
2020-01-22 20:54:33,091 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:54:35,540 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:54:35,541 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:54:35,541 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 2  Avg accuracy: 0.5003 Avg loss: 0.9480


2020-01-22 20:54:36,388 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:54:36,389 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 2  Avg accuracy: 0.5072 Avg loss: 0.9648


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:54:39,099 : INFO : Epoch[3] Complete. Time taken: 00:00:02
2020-01-22 20:54:39,104 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:54:41,672 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:54:41,673 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:54:41,673 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 3  Avg accuracy: 0.5251 Avg loss: 0.8902


2020-01-22 20:54:42,588 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:54:42,589 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 3  Avg accuracy: 0.4864 Avg loss: 0.9152


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:54:45,271 : INFO : Epoch[4] Complete. Time taken: 00:00:02
2020-01-22 20:54:45,272 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:54:47,780 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:54:47,781 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:54:47,781 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 4  Avg accuracy: 0.5407 Avg loss: 0.8710


2020-01-22 20:54:48,604 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:54:48,604 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 4  Avg accuracy: 0.5186 Avg loss: 0.9010


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:54:51,200 : INFO : Epoch[5] Complete. Time taken: 00:00:02
2020-01-22 20:54:51,208 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:54:53,804 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:54:53,805 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:54:53,805 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 5  Avg accuracy: 0.5422 Avg loss: 0.8576


2020-01-22 20:54:54,741 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:54:54,741 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 5  Avg accuracy: 0.5261 Avg loss: 0.8893


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:54:57,589 : INFO : Epoch[6] Complete. Time taken: 00:00:02
2020-01-22 20:54:57,591 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:55:00,437 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:55:00,438 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:55:00,439 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 6  Avg accuracy: 0.5582 Avg loss: 0.8446


2020-01-22 20:55:01,321 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:55:01,322 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 6  Avg accuracy: 0.5419 Avg loss: 0.8759


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:55:04,092 : INFO : Epoch[7] Complete. Time taken: 00:00:02
2020-01-22 20:55:04,096 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:55:06,524 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:55:06,525 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:55:06,525 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 7  Avg accuracy: 0.5445 Avg loss: 0.8380


2020-01-22 20:55:07,313 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:55:07,313 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 7  Avg accuracy: 0.5007 Avg loss: 0.8758


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:55:09,922 : INFO : Epoch[8] Complete. Time taken: 00:00:02
2020-01-22 20:55:09,924 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:55:12,464 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:55:12,464 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:55:12,465 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 8  Avg accuracy: 0.5572 Avg loss: 0.8290


2020-01-22 20:55:13,258 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:55:13,259 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 8  Avg accuracy: 0.5404 Avg loss: 0.8638


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:55:15,922 : INFO : Epoch[9] Complete. Time taken: 00:00:02
2020-01-22 20:55:15,926 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:55:18,507 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:55:18,508 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:55:18,509 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 9  Avg accuracy: 0.5665 Avg loss: 0.8170


2020-01-22 20:55:19,342 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:55:19,343 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 9  Avg accuracy: 0.5479 Avg loss: 0.8596


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:55:22,140 : INFO : Epoch[10] Complete. Time taken: 00:00:02
2020-01-22 20:55:22,142 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:55:24,548 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:55:24,549 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:55:24,549 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 10  Avg accuracy: 0.5721 Avg loss: 0.8092


2020-01-22 20:55:25,332 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:55:25,332 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 10  Avg accuracy: 0.5330 Avg loss: 0.8533


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:55:27,927 : INFO : Epoch[11] Complete. Time taken: 00:00:02
2020-01-22 20:55:27,932 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:55:30,446 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:55:30,447 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:55:30,447 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 11  Avg accuracy: 0.5887 Avg loss: 0.7996


2020-01-22 20:55:31,239 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:55:31,239 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 11  Avg accuracy: 0.5598 Avg loss: 0.8426


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:55:33,773 : INFO : Epoch[12] Complete. Time taken: 00:00:02
2020-01-22 20:55:33,774 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:55:36,177 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:55:36,178 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:55:36,178 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 12  Avg accuracy: 0.5911 Avg loss: 0.7922


2020-01-22 20:55:36,985 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:55:36,986 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 12  Avg accuracy: 0.5558 Avg loss: 0.8438


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:55:39,583 : INFO : Epoch[13] Complete. Time taken: 00:00:02
2020-01-22 20:55:39,584 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:55:42,366 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:55:42,367 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:55:42,367 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 13  Avg accuracy: 0.6019 Avg loss: 0.7810


2020-01-22 20:55:43,288 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:55:43,289 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 13  Avg accuracy: 0.5583 Avg loss: 0.8319


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:55:45,982 : INFO : Epoch[14] Complete. Time taken: 00:00:02
2020-01-22 20:55:45,992 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:55:48,381 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:55:48,382 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:55:48,382 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 14  Avg accuracy: 0.6059 Avg loss: 0.7732


2020-01-22 20:55:49,229 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:55:49,230 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 14  Avg accuracy: 0.5623 Avg loss: 0.8300


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:55:51,862 : INFO : Epoch[15] Complete. Time taken: 00:00:02
2020-01-22 20:55:51,867 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:55:54,397 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:55:54,397 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:55:54,398 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 15  Avg accuracy: 0.6115 Avg loss: 0.7672


2020-01-22 20:55:55,287 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:55:55,288 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 15  Avg accuracy: 0.5608 Avg loss: 0.8248


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:55:58,067 : INFO : Epoch[16] Complete. Time taken: 00:00:02
2020-01-22 20:55:58,071 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:56:00,464 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:56:00,465 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:56:00,466 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 16  Avg accuracy: 0.6239 Avg loss: 0.7605


2020-01-22 20:56:01,276 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:56:01,277 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 16  Avg accuracy: 0.5667 Avg loss: 0.8202


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:56:03,808 : INFO : Epoch[17] Complete. Time taken: 00:00:02
2020-01-22 20:56:03,811 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:56:06,287 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:56:06,287 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:56:06,288 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 17  Avg accuracy: 0.6179 Avg loss: 0.7540


2020-01-22 20:56:07,121 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:56:07,121 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 17  Avg accuracy: 0.5648 Avg loss: 0.8211


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:56:09,788 : INFO : Epoch[18] Complete. Time taken: 00:00:02
2020-01-22 20:56:09,790 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:56:12,301 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:56:12,302 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:56:12,302 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 18  Avg accuracy: 0.6242 Avg loss: 0.7509


2020-01-22 20:56:13,140 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:56:13,140 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 18  Avg accuracy: 0.5707 Avg loss: 0.8219


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:56:15,741 : INFO : Epoch[19] Complete. Time taken: 00:00:02
2020-01-22 20:56:15,749 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:56:18,319 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:56:18,319 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:56:18,320 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 19  Avg accuracy: 0.6371 Avg loss: 0.7413


2020-01-22 20:56:19,155 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:56:19,155 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 19  Avg accuracy: 0.5836 Avg loss: 0.8097


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:56:21,685 : INFO : Epoch[20] Complete. Time taken: 00:00:02
2020-01-22 20:56:21,686 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:56:24,105 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:56:24,106 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:56:24,106 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 20  Avg accuracy: 0.6417 Avg loss: 0.7341


2020-01-22 20:56:24,910 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:56:24,911 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 20  Avg accuracy: 0.5886 Avg loss: 0.8078


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:56:27,501 : INFO : Epoch[21] Complete. Time taken: 00:00:02
2020-01-22 20:56:27,507 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:56:29,917 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:56:29,918 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:56:29,918 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 21  Avg accuracy: 0.6298 Avg loss: 0.7330


2020-01-22 20:56:30,732 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:56:30,732 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 21  Avg accuracy: 0.5767 Avg loss: 0.8189


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:56:33,597 : INFO : Epoch[22] Complete. Time taken: 00:00:02
2020-01-22 20:56:33,599 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:56:36,367 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:56:36,368 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:56:36,369 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 22  Avg accuracy: 0.6492 Avg loss: 0.7205


2020-01-22 20:56:37,225 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:56:37,226 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 22  Avg accuracy: 0.5911 Avg loss: 0.8039


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:56:39,903 : INFO : Epoch[23] Complete. Time taken: 00:00:02
2020-01-22 20:56:39,912 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:56:42,371 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:56:42,372 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:56:42,372 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 23  Avg accuracy: 0.6532 Avg loss: 0.7157


2020-01-22 20:56:43,195 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:56:43,196 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 23  Avg accuracy: 0.5931 Avg loss: 0.8038


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:56:45,792 : INFO : Epoch[24] Complete. Time taken: 00:00:02
2020-01-22 20:56:45,797 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:56:48,292 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:56:48,293 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:56:48,293 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 24  Avg accuracy: 0.6515 Avg loss: 0.7117


2020-01-22 20:56:49,200 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:56:49,201 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 24  Avg accuracy: 0.5901 Avg loss: 0.8027


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:56:51,849 : INFO : Epoch[25] Complete. Time taken: 00:00:02
2020-01-22 20:56:51,850 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:56:54,321 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:56:54,322 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:56:54,322 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 25  Avg accuracy: 0.6576 Avg loss: 0.7060


2020-01-22 20:56:55,195 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:56:55,196 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 25  Avg accuracy: 0.5980 Avg loss: 0.7938


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:56:58,168 : INFO : Epoch[26] Complete. Time taken: 00:00:02
2020-01-22 20:56:58,170 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:57:00,899 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:57:00,900 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:57:00,900 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 26  Avg accuracy: 0.6566 Avg loss: 0.7093


2020-01-22 20:57:01,795 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:57:01,795 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 26  Avg accuracy: 0.5821 Avg loss: 0.8077


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:57:04,464 : INFO : Epoch[27] Complete. Time taken: 00:00:02
2020-01-22 20:57:04,467 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:57:06,963 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:57:06,963 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:57:06,964 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 27  Avg accuracy: 0.6609 Avg loss: 0.6949


2020-01-22 20:57:07,764 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:57:07,765 : INFO : Engine run complete. Time taken 00:00:00


Validation Results - Epoch: 27  Avg accuracy: 0.5935 Avg loss: 0.8010


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

2020-01-22 20:57:10,383 : INFO : Epoch[28] Complete. Time taken: 00:00:02
2020-01-22 20:57:10,385 : INFO : Engine run starting with max_epochs=1.





2020-01-22 20:57:12,907 : INFO : Epoch[1] Complete. Time taken: 00:00:02
2020-01-22 20:57:12,908 : INFO : Engine run complete. Time taken 00:00:02
2020-01-22 20:57:12,908 : INFO : Engine run starting with max_epochs=1.


Training Results - Epoch: 28  Avg accuracy: 0.6725 Avg loss: 0.6895


2020-01-22 20:57:13,712 : INFO : Epoch[1] Complete. Time taken: 00:00:00
2020-01-22 20:57:13,713 : INFO : EarlyStopping: Stop training
2020-01-22 20:57:13,713 : INFO : Terminate signaled. Engine will stop after current iteration is finished.
2020-01-22 20:57:13,714 : INFO : Engine run complete. Time taken 00:00:00
2020-01-22 20:57:13,716 : INFO : Engine run complete. Time taken 00:02:49


Validation Results - Epoch: 28  Avg accuracy: 0.5975 Avg loss: 0.7979


<ignite.engine.engine.State at 0x7f8f2b111850>

In [25]:
fname = 'models/stance_model_linear3.pth'
torch.save(model, fname)

In [26]:
mod = torch.load('models/stance_model_linear3.pth')
mod.eval()

MultiLinearRegression(
  (dropout): Dropout(p=0.5, inplace=False)
  (linear1): Linear(in_features=300, out_features=256, bias=True)
  (linear2): Linear(in_features=256, out_features=1024, bias=True)
  (linear3): Linear(in_features=1024, out_features=3, bias=True)
)

In [27]:
mod.to(device)
correct = 0
total = 0
batch_accs = np.array([])
pred = np.array([])
true = np.array([])
with torch.no_grad():
    for data in test_dl:
        x, y = data
        x, y = x.to(device), y.to(device)
        outputs = mod(x)
        #outputs = model(x, l)
        print(type(y))
        _, predicted = torch.max(outputs.data, 1)
        total += y.size(0)
        correct += (predicted == y).sum().item()
        batch_accs = np.append(batch_accs,((predicted == y).sum().item())/y.size(0))
        pred = np.concatenate((pred, predicted.cpu().numpy()))
        true = np.concatenate((true, y.cpu().numpy()))
        

print('correct ', correct)
print('total ', total)
print(batch_accs.mean(), batch_accs.std())
print('Accuracy of the network on the test: %d %%' % (
    100 * correct / total))
print(pred)

MultiLinearRegression(
  (dropout): Dropout(p=0.5, inplace=False)
  (linear1): Linear(in_features=300, out_features=256, bias=True)
  (linear2): Linear(in_features=256, out_features=1024, bias=True)
  (linear3): Linear(in_features=1024, out_features=3, bias=True)
)

<class 'torch.Tensor'>
<class 'torch.Tensor'>
correct  1231
total  2016
0.6105248235887097 0.005686113911290314
Accuracy of the network on the test: 61 %
[0. 1. 1. ... 1. 1. 1.]


In [28]:
from sklearn.metrics import f1_score
f1_score = f1_score(true, pred, average='macro')
print('F-score: ', f1_score)

F-score:  0.6372955176631079


In [29]:
from sklearn.metrics import precision_recall_fscore_support

In [30]:
f1_support = precision_recall_fscore_support(true, pred, average='macro')
print(f1_support)

(0.6697604396028676, 0.6180131479231035, 0.6372955176631079, None)


In [31]:
from sklearn.metrics import classification_report
target_names = ['class 0', 'class 1', 'class 2']
print(classification_report(true, pred, target_names=target_names))

              precision    recall  f1-score   support

     class 0       0.55      0.64      0.59       807
     class 1       0.59      0.56      0.57       829
     class 2       0.87      0.65      0.74       380

    accuracy                           0.61      2016
   macro avg       0.67      0.62      0.64      2016
weighted avg       0.63      0.61      0.61      2016



In [33]:
torch.cuda.empty_cache()