In [None]:
!pip install -Uqq fastbook
import fastbook
fastbook.setup_book()


Mounted at /content/gdrive


In [None]:
from fastai.vision.all import*
from fastbook import*

In [None]:
import os
import pandas as pd
import numpy as np
import spacy
import matplotlib.pyplot as plt
import seaborn as sns
import timeit

import collections
from IPython.display import HTML, IFrame
from textblob import TextBlob
from wordcloud import WordCloud
from tqdm import tqdm_notebook
from sklearn.preprocessing import MinMaxScaler
from sklearn.manifold import TSNE
from sklearn.decomposition import KernelPCA




In [None]:
import re
import torch
from collections import defaultdict, Counter
from pprint import pprint
import warnings
warnings.filterwarnings('ignore')

In [None]:
!pip install w3lib
from w3lib.html import remove_tags

Collecting w3lib
  Downloading w3lib-1.22.0-py2.py3-none-any.whl (20 kB)
Installing collected packages: w3lib
Successfully installed w3lib-1.22.0


In [None]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

In [None]:
def load_data(path, file_list, dataset, encoding='utf8'):
   for file in file_list:
        with open(os.path.join(path, file), 'r', encoding=encoding) as text:
            dataset.append(text.read())


In [None]:
path = untar_data(URLs.IMDB)

In [None]:
path.ls()

(#7) [Path('/root/.fastai/data/imdb/tmp_lm'),Path('/root/.fastai/data/imdb/tmp_clas'),Path('/root/.fastai/data/imdb/test'),Path('/root/.fastai/data/imdb/unsup'),Path('/root/.fastai/data/imdb/README'),Path('/root/.fastai/data/imdb/imdb.vocab'),Path('/root/.fastai/data/imdb/train')]

In [None]:

# Create lists that will contain read lines
train_pos, train_neg, test_pos, test_neg = [], [], [], []

# Create a dictionary of paths and lists that store lines (key: value = path: list)
sets_dict = {'train/pos/': train_pos, 'train/neg/': train_neg,
             'test/pos/': test_pos, 'test/neg/': test_neg}

# Load the data
for dataset in sets_dict:
        file_list = [f for f in os.listdir(os.path.join(path, dataset)) if f.endswith('.txt')]
        load_data(os.path.join(path, dataset), file_list, sets_dict[dataset])

In [None]:
# Concatenate training and testing examples into one dataset
dataset = pd.concat([pd.DataFrame({'review': train_pos, 'label':1}),
                     pd.DataFrame({'review': test_pos, 'label':1}),
                     pd.DataFrame({'review': train_neg, 'label':0}),
                     pd.DataFrame({'review': test_neg, 'label':0})],
                     axis=0, ignore_index=True)

In [None]:
# Get indices of duplicate data (excluding first occurrence)
duplicate_indices = dataset.loc[dataset.duplicated(keep='first')].index

# Count and print the number of duplicates
print('Number of duplicates in the dataset: {}'.format(dataset.loc[duplicate_indices, 'review'].count()))

Number of duplicates in the dataset: 418


In [None]:
# Drop duplicates
dataset.drop_duplicates(keep='first', inplace=True)

In [None]:
# Save raw dataset as a CSV file
dataset.to_csv(os.path.join(path, '/content/dataset/datasets_feat/dataset_raw/dataset_raw.csv'), index=False)

In [None]:
# Load raw dataset from CSV file
dataset = pd.read_csv(os.path.join(path, '/content/dataset/datasets_feat/dataset_raw/dataset_raw.csv'))

In [None]:
def polarity(text):
    """Calculate the polarity score of the input text.
    
    """
    return TextBlob(text).sentiment.polarity

In [None]:
def subjectivity(text):
    """Calculate the subjectivity score of the input text.
    
    """
    return TextBlob(text).sentiment.subjectivity

In [None]:
def pos2(df, batch_size, n_threads, required_tags):
  
    # Create empty dictionary
    review_dict = collections.defaultdict(dict)
    for i, doc in enumerate(nlp.pipe(df, batch_size=batch_size, n_threads=n_threads)):
         for token in doc:
            pos = token.pos_
            if pos in required_tags:
                review_dict[i].setdefault(pos, 0)
                review_dict[i][pos] = review_dict[i][pos] + 1
    # Transpose data frame to shape (index, tags)
    return pd.DataFrame(review_dict).transpose()

In [None]:
def extract_features(df, batch_size, n_threads, required_tags):
    
    # Calculate polarity
    df['polarity'] = df.review.apply(polarity).astype('float16')
    # Calculate subjectivity
    df['subjectivity'] = df.review.apply(subjectivity).astype('float16') 
    # Calculate number of words in review
    df['word_count'] = df.review.apply(lambda text: len(text.split())).astype('int16')
    # Count number of uppercase words, then divide by word_count
    df['UPPERCASE'] = df.review.apply(lambda text: len([word for word in text.split()\
                                                        if word.isupper()]))/df.word_count
    # Change data type to float16
    df.UPPERCASE = df.UPPERCASE.astype('float16')
    # Count number of digits, then divide by word_count
    df['DIGITS'] = df.review.apply(lambda text: len([word for word in text.split()\
                                                     if word.isdigit()]))/df.word_count
    # Change data type to float16
    df.DIGITS = df.DIGITS.astype('float16')
    # Perform part-of-speech taging
    pos_data = pos2(df.review, batch_size=batch_size, n_threads=n_threads, required_tags=required_tags)
    # Divide POS tags count by word_count
    pos_data = pos_data.div(df.word_count, axis=0).astype('float16')
    # Concatenate pandas data frames horizontaly
    return pd.concat([df, pos_data], axis=1)

In [None]:
# Load language model and disable unnecessary components of processing pipeline
nlp = spacy.load('en_core_web_sm', disable = ['ner', 'parser', 'textcat', '...'])
required_tags = ['PROPN', 'PUNCT', 'NOUN', 'ADJ', 'VERB']

batch_size = 512
n_threads = 2

# Test the processing time on a part of the trainig set, given batch_size and n_threads
start_time = timeit.default_timer()
print('Start processing 1000 examples using batch_size: {} and n_threads: {}'.format(batch_size, n_threads))
extract_features(dataset.loc[:1000, :], batch_size=batch_size, n_threads=n_threads, required_tags=required_tags)
print('Feature extraction function processing time: {:.2f} sec'.format(timeit.default_timer() - start_time))

Start processing 1000 examples using batch_size: 512 and n_threads: 2
Feature extraction function processing time: 14.10 sec


In [None]:
def split_extract_save(df, name, path, part_size, batch_size, n_threads, required_tags, nlp):
  
    if name not in os.listdir(path):
        dataset_parts = []
        N = int(len(df)/part_size)
        # Create list of dataframe chunks
        data_frames = [df.iloc[i*part_size:(i+1)*part_size].copy() for i in range(N+1)]
        # Process dataset partialy 
        for frame in tqdm_notebook(data_frames):
            # Extract features from dataset chunk
            dataset_part = extract_features(frame, batch_size=batch_size, n_threads=n_threads,
                                            required_tags=required_tags)
            dataset_parts.append(dataset_part)
            # Reload nlp
            nlp = spacy.load('en_core_web_sm', disable = ['ner', 'parser', 'textcat', '...'])

        # Concatenate all parts into one dataset
        dataset_feat = pd.concat(dataset_parts, axis=0, sort=False)
        # Replace missing values NaN with 0
        dataset_feat.fillna(0, inplace=True)
        # Convert label values to int16
        dataset_feat.label = dataset_feat.label.astype('int16')
        # Export data frame to CSV file
        dataset_feat.to_csv(path + name, index=False)
    else:
        print('File {} already exists in given directory.'.format(name))  

In [None]:
# Define all required variables
nlp = spacy.load('en_core_web_sm', disable = ['ner', 'parser', 'textcat', '...'])
required_tags = ['PROPN', 'PUNCT', 'NOUN', 'ADJ', 'VERB']
batch_size = 512
n_threads = 2
part_size = 5000
path = os.path.join(os.getcwd(), 'dataset/datasets_feat/')
name = 'dataset_feat.csv'

# Perform feature extraction and export resulted file into CSV
split_extract_save(dataset, name, path, part_size, batch_size, n_threads, required_tags, nlp)

  0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
# Dictionary of {column: dtype} pairs
col_types = {'review': str,'label': np.int16, 'polarity': np.float16, 'subjectivity': np.float16,
             'word_count': np.int16, 'UPPERCASE': np.float16, 'DIGITS': np.float16, 'PROPN': np.float16,
             'VERB': np.float16, 'NOUN': np.float16, 'PUNCT': np.float16, 'ADJ': np.float16}

# Import dataset from the CSV file
dataset_feat = pd.read_csv('dataset/datasets_feat/dataset_feat.csv', dtype=col_types)

In [None]:
dataset_feat.head()

Unnamed: 0,review,label,polarity,subjectivity,word_count,UPPERCASE,DIGITS,VERB,NOUN,PUNCT,PROPN,ADJ
0,Of course if you are reading my review you have seen this film already. 'Raja Babu' is one of my most favorite characters. I just love the concept of a spoiled brat with a 24*7 servant on his motorcycle. Watch movies and emulate characters etc etc. I love the scene when a stone cracks in Kader khans mouth while eating. Also where Shakti Kapoor narrates a corny story of Raja Babu's affairs on a dinner table and Govinda wearing 'dharam-veer' uniform makes sentimental remarks. Thats my favorite scene of the film. 'Achcha Pitaji To Main Chalta Hoon' scene is just chemistry between two great In...,1,0.335938,0.713867,178,0.022476,0.0,0.09552,0.213501,0.17981,0.151733,0.073059
1,"I really enjoyed this movie. Most of the reviews have been bad, but most critics think a movie should be like an idea drama. This movie has a little bit of drama, but the rest is just clean fun and very entertaining. Forget about Julia Roberts being a Pretty Woman, Emma Roberts is a beautiful young lady and there is more to her than just that. Emma was so much fun to watch in the role of Nancy Drew. It is good to see a new face. I believe she will go far.<br /><br />Nancy Drew may not be based upon the books, but the story is still good. There is also a good blend of other character actors...",1,0.351807,0.59375,196,0.015305,0.0,0.107117,0.188721,0.122437,0.102051,0.096924
2,"This is not a movie you watch for entertainment, at least most people I know would not.<br /><br />It's portraits the cruelty to both body and mind that happen in a war pretty well, the characters seem plausible, especially because you ""read their minds"", something more often found in books and rarely in movies, however done very well in this piece. I would place it next to ""All quiet on the western front"" and ""Die Brücke"" in terms of leaving a lasting impression.<br /><br />I wish I could screen it at school, along with the other two movies - however finding a copy of it showed to be pret...",1,0.118896,0.48999,115,0.03479,0.0,0.156494,0.182617,0.165161,0.008698,0.06958
3,"Brilliant! My wife and I joined the sprawling line to see Holly at the Edinburgh Film Festival. After seeing the film, I can understand why there was such a long line. Holly is a touching story about an impossible connection between two people. She is a young girl, he is a worn out westerner. The film grasped every bone in our body. There aren't any graphic scenes or anything that is hard to watch - its the surrealism of normality that really kicks you in the gut. The film is beautifully shot. Among others, we loved the scene where Patrick teaches Holly to ride a small motorcycle. Thuy Ngo...",1,0.166748,0.53418,131,0.015266,0.0,0.137451,0.183228,0.122131,0.091614,0.076355
4,"This film could well have been one of those ordinary ""soapies"" relating the day to day events of half a dozen families whose lives are intertwined..broken relationships,building new friendships, street bashings, near accidents, hopes and dreams and even the discovery of a baby discarded under some bushes! What a mixture of events!<br /><br />Fortunately the film maker goes beyond those daily events and poses questions to consider although there are no satisfactory answers. He asksin this chaotic world do things just happen, is it just luck when things turn out right or , taking a fatalis...",1,0.204102,0.521973,198,0.0,0.0,0.121216,0.212158,0.111084,0.025253,0.101013


In [None]:
# Import the dataset
dataset_feat = pd.read_csv('dataset/datasets_feat/dataset_feat.csv')

In [None]:
def token_filter(token):
 
    return not (token.is_punct | token.is_space | token.is_stop | token.is_digit | token.like_num)

def text_preprocessing(df, batch_size, n_threads):

    # Remove HTML tags
    df = df.apply(remove_tags)
    # Make lowercase
    df = df.str.lower()
    processed_docs = []
    for doc in list(nlp.pipe(df, batch_size=batch_size, n_threads=n_threads)):
        # Remove stopwords, spaces, punctutations and digits
        text = [token for token in doc if token_filter(token)]
        # Lemmatization
        text = [token.lemma_ for token in text if token.lemma_ != '-PRON-']
        processed_docs.append(' '.join(text))
    return pd.Series(processed_docs, name='clean_review', index=df.index) 

In [None]:
# Define the variables
nlp = spacy.load('en_core_web_sm', disable = ['ner', 'parser', 'textcat', 'tagger', '...'])
batch_size = 512
n_threads = 2

# Test the processing time on a part of the trainig set, given batch_size and n_threads
print('Start processing 1000 examples using batch_size: {} and n_threads: {}'.format(batch_size, n_threads))
start_time = timeit.default_timer()
text_preprocessing(dataset_feat.loc[:1000, 'review'], batch_size=batch_size, n_threads=n_threads)
print('Processing time: {:.2f} sec'.format(timeit.default_timer() - start_time))

Start processing 1000 examples using batch_size: 512 and n_threads: 2
Processing time: 2.06 sec


In [None]:
def split_norm_save(df, name, path, part_size, batch_size, n_threads, nlp):
   
    if name not in os.listdir(path):
        dataset_parts = []
        N = int(len(df)/part_size)
        # Create list of dataframe chunks
        data_frames = [df.iloc[i*part_size:(i+1)*part_size, 0].copy() for i in range(N+1)]
        # Process dataset partialy 
        for frame in tqdm_notebook(data_frames):
            # Normalize dataset chunk
            dataset_part = text_preprocessing(frame, batch_size=batch_size, n_threads=n_threads)
            dataset_parts.append(dataset_part)
            # Reload nlp
            nlp = spacy.load('en_core_web_sm', disable = ['ner', 'parser', 'textcat', '...'])

        # Concatenate all parts into one series
        concat_clean = pd.concat(dataset_parts, axis=0, sort=False)
        # Concatenate dataset and cleaned review seires
        dataset_clean = pd.concat([df, concat_clean], axis=1)        
        # Export data frame to CSV file
        dataset_clean.to_csv(path + name, index=False)
    else:
        print('File {} already exists in given directory.'.format(name)) 

In [None]:
# Define variables
nlp = spacy.load('en_core_web_sm', disable = ['ner', 'parser', 'textcat', 'tagger', '...'])
batch_size = 512
n_threads = 2
part_size = 5000
path = os.path.join(os.getcwd(), 'dataset/datasets_feat_clean/')
name = 'dataset_feat_clean.csv'

# Perform text preprocessing and save the resulted frame to CSV file
split_norm_save(dataset_feat, name, path, part_size, batch_size, n_threads, nlp)

  0%|          | 0/19 [00:00<?, ?it/s]

In [None]:
# Import preprocessed dataset from CSV file
dataset_feat_clean = pd.read_csv('dataset/datasets_feat_clean/dataset_feat_clean.csv')

In [None]:
# Display the first 5 rows
dataset_feat_clean.head()

Unnamed: 0,review,label,polarity,subjectivity,word_count,UPPERCASE,DIGITS,VERB,NOUN,PUNCT,PROPN,ADJ,clean_review
0,Of course if you are reading my review you have seen this film already. 'Raja Babu' is one of my most favorite characters. I just love the concept of a spoiled brat with a 24*7 servant on his motorcycle. Watch movies and emulate characters etc etc. I love the scene when a stone cracks in Kader khans mouth while eating. Also where Shakti Kapoor narrates a corny story of Raja Babu's affairs on a dinner table and Govinda wearing 'dharam-veer' uniform makes sentimental remarks. Thats my favorite scene of the film. 'Achcha Pitaji To Main Chalta Hoon' scene is just chemistry between two great In...,1,0.336,0.714,178.0,0.02248,0.0,0.0955,0.2135,0.1798,0.1517,0.07306,"course read review see film raja babu favorite character love concept spoil brat servant motorcycle watch movie emulate character etc etc love scene stone crack kader khan mouth eat shakti kapoor narrate corny story raja babu affair dinner table govinda wear dharam veer uniform make sentimental remark s favorite scene film achcha pitaji main chalta hoon scene chemistry great indian actor comical scene dialog brilliant cat mouse film watch actor help take away scene total entertainment like govinda kader khan chemistry think rb 6th list david dhawan deewana mastana ankhein','shola shabnam s..."
1,"I really enjoyed this movie. Most of the reviews have been bad, but most critics think a movie should be like an idea drama. This movie has a little bit of drama, but the rest is just clean fun and very entertaining. Forget about Julia Roberts being a Pretty Woman, Emma Roberts is a beautiful young lady and there is more to her than just that. Emma was so much fun to watch in the role of Nancy Drew. It is good to see a new face. I believe she will go far.<br /><br />Nancy Drew may not be based upon the books, but the story is still good. There is also a good blend of other character actors...",1,0.3518,0.5938,196.0,0.015305,0.0,0.1071,0.1887,0.12244,0.10205,0.0969,enjoy movie review bad critic think movie like idea drama movie little bite drama rest clean fun entertain forget julia roberts pretty woman emma roberts beautiful young lady emma fun watch role nancy draw good new face believe far.nancy draw base book story good good blend character actor support actor like pat carroll barry bostwick rachel leigh cook chris kattan credit surprise disney release movie people like movie contain sex violence curse good family film rare day time family movie judge self good wait sequel
2,"This is not a movie you watch for entertainment, at least most people I know would not.<br /><br />It's portraits the cruelty to both body and mind that happen in a war pretty well, the characters seem plausible, especially because you ""read their minds"", something more often found in books and rarely in movies, however done very well in this piece. I would place it next to ""All quiet on the western front"" and ""Die Brücke"" in terms of leaving a lasting impression.<br /><br />I wish I could screen it at school, along with the other two movies - however finding a copy of it showed to be pret...",1,0.1189,0.49,115.0,0.0348,0.0,0.1565,0.1826,0.1652,0.0087,0.0696,movie watch entertainment people know not.it portrait cruelty body mind happen war pretty character plausible especially read mind find book rarely movie piece place quiet western die brücke term leave last impression.i wish screen school movie find copy show pretty hard shame
3,"Brilliant! My wife and I joined the sprawling line to see Holly at the Edinburgh Film Festival. After seeing the film, I can understand why there was such a long line. Holly is a touching story about an impossible connection between two people. She is a young girl, he is a worn out westerner. The film grasped every bone in our body. There aren't any graphic scenes or anything that is hard to watch - its the surrealism of normality that really kicks you in the gut. The film is beautifully shot. Among others, we loved the scene where Patrick teaches Holly to ride a small motorcycle. Thuy Ngo...",1,0.1667,0.534,131.0,0.01527,0.0,0.1375,0.1832,0.12213,0.0916,0.07635,brilliant wife join sprawl line holly edinburgh film festival see film understand long line holly touch story impossible connection people young girl wear westerner film grasp bone body graphic scene hard watch surrealism normality kick gut film beautifully shoot love scene patrick teach holly ride small motorcycle thuy ngoyen rawness not believe act job)and ron livingston performance stay couple day highly recommend
4,"This film could well have been one of those ordinary ""soapies"" relating the day to day events of half a dozen families whose lives are intertwined..broken relationships,building new friendships, street bashings, near accidents, hopes and dreams and even the discovery of a baby discarded under some bushes! What a mixture of events!<br /><br />Fortunately the film maker goes beyond those daily events and poses questions to consider although there are no satisfactory answers. He asksin this chaotic world do things just happen, is it just luck when things turn out right or , taking a fatalis...",1,0.2041,0.522,198.0,0.0,0.0,0.1212,0.2122,0.1111,0.02525,0.101,film ordinary soapies relate day day event half dozen family live intertwine break relationship build new friendship street bashing near accident hope dream discovery baby discard bush mixture events!fortunately film maker go daily event pose question consider satisfactory answer ask chaotic world thing happen luck thing turn right take fatalistic view person predestine certain place certain time involve event future take new perspective uncanny experience.is super ego make believe important character say sit edge overlook grand canyon come realize infinitely small was.this favourite film ...


In [None]:
# Display the random review before normalization
idx = np.random.randint(dataset_feat_clean.shape[0])
HTML(dataset_feat_clean.loc[idx, 'review'])

In [None]:
def train_val_test_split(df, val_size, test_size, random_state=0):
 
    assert (val_size + test_size) < 1, 'Validation size and test size sum is greater or equal 1'
    assert val_size >= 0 and test_size >= 0, 'Negative size is not accepted'
    train, val, test = np.split(df.sample(frac=1, random_state=random_state),
                                [int((1-(val_size+test_size))*len(df)), int((1-test_size)*len(df))])
    return train, val, test

In [None]:
# Split entire raw dataset into training, validation and test sets
train_set, val_set, test_set = train_val_test_split(datasets_feat_clean, val_size=0.20, test_size=0.10)

In [None]:
# Check training, validation and test sets shapes
print('Training set shape: {}'.format(train_set.shape))
print('Validation set shape: {}'.format(val_set.shape))
print('Test set shape: {}'.format(test_set.shape))

Training set shape: (65914, 13)
Validation set shape: (18833, 13)
Test set shape: (9417, 13)


In [None]:
# Save training, validation and test sets to CSV files
train_set.to_csv('dataset/datasets_feat_clean/train_feat_clean.csv', index=False)
val_set.to_csv('dataset/datasets_feat_clean/val_feat_clean.csv', index=False)
test_set.to_csv('dataset/datasets_feat_clean/test_feat_clean.csv', index=False)

In [None]:

class Vocab:

  def __init__(self, dataset, target_col=None, word2index=None, sos_token='<SOS>', eos_token='<EOS>', unk_token='<UNK>',
             pad_token='<PAD>', min_word_count=5, max_vocab_size=None, max_seq_len=0.8,
             use_pretrained_vectors=False, glove_path='Glove/', glove_name='glove.6B.100d.txt',
             weights_file_name='Glove/weights.npy'):
        
        # Convert pandas dataframe to numpy.ndarray
        if isinstance(dataset, pd.DataFrame):
            dataset = dataset.to_numpy()
        
        self.dataset = dataset
        self.target_col = target_col
        
        if self.target_col:
            self.y_lengths = []
            
        self.x_lengths = []
        self.word2idx_mapping = word2index
        
        # Define word2idx and idx2word as empty dictionaries
        if self.word2idx_mapping:
            self.word2index = self.word2idx_mapping
        else:
            self.word2index = defaultdict(dict)
            self.index2word = defaultdict(dict)            
        
        # Instantiate special tokens
        self.sos_token = sos_token
        self.eos_token = eos_token
        self.unk_token = unk_token
        self.pad_token = pad_token
        
        # Instantiate min_word_count, max_vocab_size and max_seq_len
        self.min_word_count = min_word_count
        self.max_vocab_size = max_vocab_size
        self.max_seq_len = max_seq_len
        
        self.use_pretrained_vectors = use_pretrained_vectors
        
        if self.use_pretrained_vectors: 
            self.glove_path = glove_path
            self.glove_name = glove_name
            self.weights_file_name = weights_file_name
        
        self.build_vocab()
        

  def build_vocab(self):
        """Build the vocabulary, filter dataset sequences and create the weights matrix if specified.
        
        """
        # Create a dictionary that maps words to their count
        self.word_count = self.word2count()

        # Trim the vocabulary
        # Get rid of out-of-vocabulary words from the dataset
        if self.min_word_count or self.max_vocab_size:
            self.trimVocab()
            self.trimDatasetVocab()

        # Trim sequences in terms of length
        if self.max_seq_len:
            if self.x_lengths:
                self.trimSeqLen()

            else:
                # Calculate sequences lengths
                self.x_lengths = [len(seq.split()) for seq in self.dataset[:, 0]]
                
                if self.target_col:
                    self.y_lengths = [len(seq.split()) for seq in self.dataset[:, self.target_col]]
                    
                self.trimSeqLen()                

                
        # Map each tokens to index
        if not self.word2idx_mapping:
            self.mapWord2index()
               
        # Crate index2word mapping
        self.index2word = {index: word for word, index in self.word2index.items()}
        
        # Map dataset tokens to indices
        self.mapWords2indices()
        
        # Create weights matrix based on Glove vectors
        if self.use_pretrained_vectors:
            self.glove_vectors()       
        
            
  def word2count(self):
        """Count the number of words occurrences.
        
        """
        # Instantiate the Counter object
        word_count = Counter()

        # Iterate through the dataset and count tokens
        for line in self.dataset[:, 0]:
            word_count.update(str(line).split())
            
            # Include strings from target column
            if self.target_col:
                for line in self.dataset[:, self.target_col]:
                    word_count.update(str(line).split())
            
        return word_count
    

  def trimVocab(self):
        """Trim the vocabulary in terms of the minimum word count or the vocabulary maximum size.
        
        """
        # Trim the vocabulary in terms of the minimum word count
        if self.min_word_count and not self.max_vocab_size:
            # If min_word_count <= 1, use the quantile approach
            if self.min_word_count <= 1:
                # Create the list of words count
                word_stat = [count for count in self.word_count.values()]
                # Calculate the quantile of words count
                quantile = int(np.quantile(word_stat, self.min_word_count))
                print('Trimmed vocabulary using as mininum count threashold: quantile({:3.2f}) = {}'.\
                      format(self.min_word_count, quantile))
                # Filter words using quantile threshold
                self.trimmed_word_count = {word: count for word, count in self.word_count.items() if count >= quantile}
            # If min_word_count > 1 use standard approach
            else:
                # Filter words using count threshold
                self.trimmed_word_count = {word: count for word, count in self.word_count.items()\
                                   if count >= self.min_word_count}
                print('Trimmed vocabulary using as minimum count threashold: count = {:3.2f}'.format(self.min_word_count))
                     
        # Trim the vocabulary in terms of its maximum size
        elif self.max_vocab_size and not self.min_word_count:
            self.trimmed_word_count = {word: count for word, count in self.word_count.most_common(self.max_vocab_size)}
            print('Trimmed vocabulary using maximum size of: {}'.format(self.max_vocab_size))
        else:
            raise ValueError('Use min_word_count or max_vocab_size, not both!')
            
        print('{}/{} tokens has been retained'.format(len(self.trimmed_word_count.keys()),
                                                     len(self.word_count.keys())))

    
  def trimDatasetVocab(self):
        """Get rid of rare words from the dataset sequences.
        
        """
        for row in range(self.dataset.shape[0]):
            trimmed_x = [word for word in str(self.dataset[row, 0]).split() if word in self.trimmed_word_count.keys()]
            self.x_lengths.append(len(trimmed_x))
            self.dataset[row, 0] = ' '.join(trimmed_x)
        print('Trimmed input strings vocabulary')
                            
        if self.target_col:
            for row in range(self.dataset.shape[0]):
                trimmed_y = [word for word in str(self.dataset[row, self.target_col]).split()\
                             if word in self.trimmed_word_count.keys()]
                self.y_lengths.append(len(trimmed_y))
                self.dataset[row, self.target_col] = ' '.join(trimmed_y)
            print('Trimmed target strings vocabulary')
            
                
  def trimSeqLen(self):
        """Trim dataset sequences in terms of the length.
        
        """
        if self.max_seq_len <= 1:
            x_threshold = int(np.quantile(self.x_lengths, self.max_seq_len)) 
            if self.target_col:
                y_threshold = int(np.quantile(self.y_lengths, self.max_seq_len)) 
        else:
            x_threshold = self.max_seq_len
            if self.target_col:
                y_threshold =  self.max_seq_len
        
        if self.target_col:      
            for row in range(self.dataset.shape[0]):
                x_truncated = ' '.join(self.dataset[row, 0].split()[:x_threshold])\
                if self.x_lengths[row] > x_threshold else self.dataset[row, 0]
                
                # Add 1 if the EOS token is going to be added to the sequence
                self.x_lengths[row] = len(x_truncated.split()) if not self.eos_token else \
                                      len(x_truncated.split()) + 1
                
                self.dataset[row, 0] = x_truncated
                
                y_truncated = ' '.join(self.dataset[row, self.target_col].split()[:y_threshold])\
                if self.y_lengths[row] > y_threshold else self.dataset[row, self.target_col]
                
                # Add 1 or 2 to the length to inculde special tokens
                y_length = len(y_truncated.split())
                if self.sos_token and not self.eos_token:
                    y_length = len(y_truncated.split()) + 1
                elif self.eos_token and not self.sos_token:
                    y_length = len(y_truncated.split()) + 1
                elif self.sos_token and self.eos_token:
                    y_length = len(y_truncated.split()) + 2
                    
                self.y_lengths[row] = y_length
                
                self.dataset[row, self.target_col] = y_truncated
                
            print('Trimmed input sequences lengths to the length of: {}'.format(x_threshold))
            print('Trimmed target sequences lengths to the length of: {}'.format(y_threshold))
            
        else:
            for row in range(self.dataset.shape[0]):

                x_truncated = ' '.join(self.dataset[row, 0].split()[:x_threshold])\
                if self.x_lengths[row] > x_threshold else self.dataset[row, 0]
                
                # Add 1 if the EOS token is going to be added to the sequence
                self.x_lengths[row] = len(x_truncated.split()) if not self.eos_token else \
                                      len(x_truncated.split()) + 1
                
                self.dataset[row, 0] = x_truncated
                
            print('Trimmed input sequences lengths to the length of: {}'.format(x_threshold))
                
        
  def mapWord2index(self):
        """Populate vocabulary word2index dictionary.
        
        """
        # Add special tokens as first elements in word2index dictionary
        token_count = 0
        for token in [self.pad_token, self.sos_token, self.eos_token, self.unk_token]:
            if token:
                self.word2index[token] = token_count
                token_count += 1
        
        # If vocabulary is trimmed, use trimmed_word_count
        if self.min_word_count or self.max_vocab_size:
            for key in self.trimmed_word_count.keys():
                self.word2index[key] = token_count
                token_count += 1
            
        # If vocabulary is not trimmed, iterate through dataset    
        else:
            for line in self.dataset.iloc[:, 0]:
                for word in line.split():
                    if word not in self.word2index.keys():
                        self.word2index[word] = token_count
                        token_count += 1
            # Include strings from target column
            if self.target_col:
                for line in self.dataset.iloc[:, self.target_col]:
                    for word in line.split():
                        if word not in self.word2index.keys():
                            self.word2index[word] = token_count
                            token_count += 1
                            
        self.word2index.default_factory = lambda: self.word2index[self.unk_token]
                            
        
  def mapWords2indices(self):
        """Iterate through the dataset to map each word to its corresponding index.
        Use special tokens if specified.
        
        """
        for row in range(self.dataset.shape[0]):
            words2indices = []
            for word in self.dataset[row, 0].split():
                words2indices.append(self.word2index[word])
                    
            # Append the end of the sentence token
            if self.eos_token:
                words2indices.append(self.word2index[self.eos_token])
                
            self.dataset[row, 0] = np.array(words2indices)
                
        # Map strings from target column
        if self.target_col:
            for row in range(self.dataset.shape[0]):
                words2indices = []
                
                # Insert the start of the sentence token
                if self.sos_token:
                    words2indices.append(self.word2index[self.sos_token])
                    
                for word in self.dataset[row, self.target_col].split():
                    words2indices.append(self.word2index[word])

                        
                # Append the end of the sentence token
                if self.eos_token:
                    words2indices.append(self.word2index[self.eos_token])
                    
                self.dataset[row, self.target_col] = np.array(words2indices)
           
        print('Mapped words to indices')

    
  def glove_vectors(self):
        """ Read glove vectors from a file, create the matrix of weights mapping vocabulary tokens to vectors.
        Save the weights matrix to the numpy file.
        
        """
        # Load Glove word vectors to the pandas dataframe
        try:
            gloves = pd.read_csv(self.glove_path + self.glove_name, sep=" ", quoting=3, header=None, index_col=0)
        except FileNotFoundError:
            print('File: {} not found in: {} directory'.format(self.glove_name, self.glove_path))
            
        # Map Glove words to vectors
        print('Start creating glove_word2vector dictionary')
        self.glove_word2vector = gloves.T.to_dict(orient='list')
        
        # Extract embedding dimension
        emb_dim = int(re.findall('\d+' ,self.glove_name)[-1])
        # Length of the vocabulary
        matrix_len = len(self.word2index)
        # Initialize the weights matrix
        weights_matrix = np.zeros((matrix_len, emb_dim))
        words_found = 0

        # Populate the weights matrix
        for word, index in self.word2index.items():
            try: 
                weights_matrix[index] = np.array(self.glove_word2vector[word])
                words_found += 1
            except KeyError:
                # If vector wasn't found in Glove, initialize random vector
                weights_matrix[index] = np.random.normal(scale=0.6, size=(emb_dim, ))
         
        # Save the weights matrix into numpy file
        np.save(self.weights_file_name, weights_matrix, allow_pickle=False)
        
        # Delete glove_word2vector variable to free the memory
        del self.glove_word2vector
                        
        print('Extracted {}/{} of pre-trained word vectors.'.format(words_found, matrix_len))
        print('{} vectors initialized to random numbers'.format(matrix_len - words_found))
        print('Weights vectors saved into {}'.format(self.weights_file_name))
                

In [None]:
# Load the training set
train_dataset = pd.read_csv('dataset/datasets_feat_clean/train_feat_clean.csv', 
                      usecols=['clean_review', 'subjectivity', 'polarity', 'word_count', 'label'],
                      dtype={'clean_review': str, 'label': np.int16})

In [None]:
# Change the columns order
train_dataset = train_dataset[['clean_review', 'subjectivity', 'polarity', 'word_count', 'label']]

In [None]:
# Display the first 5 rows from the dataset
train_dataset.head()

In [None]:
train_vocab = Vocab(train_dataset, target_col=None, word2index=None, sos_token='<SOS>', eos_token='<EOS>',
                    unk_token='<UNK>', pad_token='<PAD>', min_word_count=None, max_vocab_size=5000, max_seq_len=0.8,
                    use_pretrained_vectors=False, glove_path='glove/', glove_name='glove.6B.100d.txt',
                    weights_file_name='glove/weights.npy')

Trimmed vocabulary using maximum size of: 5000
5000/129454 tokens has been retained
Trimmed input strings vocabulary
Trimmed input sequences lengths to the length of: 80
Mapped words to indices


In [None]:
# Load the validation set
val_dataset = pd.read_csv('dataset/datasets_feat_clean/val_feat_clean.csv', 
                      usecols=['clean_review', 'subjectivity', 'polarity', 'word_count', 'label'],
                      dtype={'clean_review': str, 'label': np.int16})

In [None]:
# Change the columns order
val_dataset = val_dataset[['clean_review', 'subjectivity', 'polarity', 'word_count', 'label']]

In [None]:
val_vocab = Vocab(val_dataset, target_col=None, word2index=train_vocab.word2index, sos_token='<SOS>', eos_token='<EOS>',
                  unk_token='<UNK>', pad_token='<PAD>', min_word_count=None, max_vocab_size=5000, max_seq_len=0.8,
                  use_pretrained_vectors=False, glove_path='Glove/', glove_name='glove.6B.100d.txt',
                  weights_file_name='Glove/weights.npy')

Trimmed vocabulary using maximum size of: 5000
5000/58762 tokens has been retained
Trimmed input strings vocabulary
Trimmed input sequences lengths to the length of: 80
Mapped words to indices


In [None]:
# the BatchIterator class that will enable to sort dataset examples, generate batches of input and output variables, 
#apply padding if required and be capable of iterating through all created batches

class BatchIterator:
        
    def __init__(self, dataset, batch_size=None, vocab_created=False, vocab=None, target_col=None, word2index=None,
             sos_token='<SOS>', eos_token='<EOS>', unk_token='<UNK>', pad_token='<PAD>', min_word_count=5,
             max_vocab_size=None, max_seq_len=0.8, use_pretrained_vectors=False, glove_path='Glove/',
             glove_name='glove.6B.100d.txt', weights_file_name='Glove/weights.npy'):    
    
        # Create vocabulary object
        if not vocab_created:
            self.vocab = Vocab(dataset, target_col=target_col, word2index=word2index, sos_token=sos_token, eos_token=eos_token,
                               unk_token=unk_token, pad_token=pad_token, min_word_count=min_word_count,
                               max_vocab_size=max_vocab_size, max_seq_len=max_seq_len,
                               use_pretrained_vectors=use_pretrained_vectors, glove_path=glove_path,
                               glove_name=glove_name, weights_file_name=weights_file_name)
            
            # Use created vocab.dataset object
            self.dataset = self.vocab.dataset      
        
        else:
            # If vocab was created then dataset should be the vocab.dataset object
            self.dataset = dataset
            self.vocab = vocab
            
        self.target_col = target_col 
        
        self.word2index = self.vocab.word2index
            
        # Define the batch_size
        if batch_size:
            self.batch_size = batch_size
        else:
            # Use the length of dataset as batch_size
            self.batch_size = len(self.dataset)
                
        self.x_lengths = np.array(self.vocab.x_lengths)
        
        if self.target_col:
            self.y_lengths = np.array(self.vocab.y_lengths)
            
        self.pad_token = self.vocab.word2index[pad_token]
            
        self.sort_and_batch()

        
    def sort_and_batch(self):
        """ Sort examples within entire dataset, then perform batching and shuffle all batches.

        """
        # Extract row indices sorted according to lengths
        if not self.target_col:
            sorted_indices = np.argsort(self.x_lengths)
        else:
            sorted_indices = np.lexsort((self.y_lengths, self.x_lengths))
        
        # Sort all sets
        self.sorted_dataset = self.dataset[sorted_indices[::-1]]
        self.sorted_x_lengths = np.flip(self.x_lengths[sorted_indices])
        
        if self.target_col:
            self.sorted_target = self.sorted_dataset[:, self.target_col]
            self.sorted_y_lengths = np.flip(self.x_lengths[sorted_indices])
        else:
            self.sorted_target = self.sorted_dataset[:, -1]
        
        # Initialize input, target and lengths batches
        self.input_batches = [[] for _ in range(self.sorted_dataset.shape[1]-1)]
        
        self.target_batches, self.x_len_batches = [], []

        self.y_len_batches = [] if self.target_col else None
        
        # Create batches
        for i in range(self.sorted_dataset.shape[1]-1):
            # The first column contains always sequences that should be padded.
            if i == 0:
                self.create_batches(self.sorted_dataset[:, i], self.input_batches[i], pad_token=self.pad_token)
            else:
                self.create_batches(self.sorted_dataset[:, i], self.input_batches[i])
                
        if self.target_col:
            self.create_batches(self.sorted_target, self.target_batches, pad_token=self.pad_token)
            self.create_batches(self.sorted_y_lengths, self.y_len_batches)
        else:
            self.create_batches(self.sorted_target, self.target_batches)
        
        self.create_batches(self.sorted_x_lengths, self.x_len_batches)
        
        # Shuffle batches
        self.indices = np.arange(len(self.input_batches[0]))
        np.random.shuffle(self.indices)
        
        for j in range(self.sorted_dataset.shape[1]-1):
            self.input_batches[j] = [self.input_batches[j][i] for i in self.indices]
        
        self.target_batches = [self.target_batches[i] for i in self.indices]
        self.x_len_batches = [self.x_len_batches[i] for i in self.indices]
        
        if self.target_col:
            self.y_len_batches = [self.y_len_batches[i] for i in self.indices]
        
        print('Batches created')
        
        
    def create_batches(self, sorted_dataset, batches, pad_token=-1):
        """ Convert each sequence to pytorch Tensor, create batches and pad them if required.
        
        """
        # Calculate the number of batches
        n_batches = int(len(sorted_dataset)/self.batch_size)

        # Create list of batches
        list_of_batches = np.array([sorted_dataset[i*self.batch_size:(i+1)*self.batch_size].copy()\
                                    for i in range(n_batches+1)])

        # Convert each sequence to pytorch Tensor
        for batch in list_of_batches:
            tensor_batch = []
            tensor_type = None
            for seq in batch:
                # Check seq data type and convert to Tensor
                if isinstance(seq, np.ndarray):
                    tensor = torch.LongTensor(seq)
                    tensor_type = 'int'
                elif isinstance(seq, np.integer):
                    tensor = torch.LongTensor([seq])
                    tensor_type = 'int'
                elif isinstance(seq, np.float):
                    tensor = torch.FloatTensor([seq])
                    tensor_type = 'float'
                elif isinstance(seq, int):
                    tensor = torch.LongTensor([seq])
                    tensor_type = 'int'
                elif isinstance(seq, float):
                    tensor = torch.FloatTensor([seq])
                    tensor_type = 'float'
                else:
                    raise TypeError('Cannot convert to Tensor. Data type not recognized')

                tensor_batch.append(tensor)
            if pad_token != -1:
                # Pad required sequences
                pad_batch = torch.nn.utils.rnn.pad_sequence(tensor_batch, batch_first=True)
                batches.append(pad_batch)
            else:
                if tensor_type == 'int':
                    batches.append(torch.LongTensor(tensor_batch))
                else:
                    batches.append(torch.FloatTensor(tensor_batch))

                
    def __iter__(self):
        """ Iterate through batches.
        
        """
        # Create a dictionary that holds variables batches to yield
        to_yield = {}
        
        # Iterate through batches
        for i in range(len(self.input_batches[0])):
            feat_list = []
            for j in range(1, len(self.input_batches)):
                feat = self.input_batches[j][i].type(torch.FloatTensor).unsqueeze(1)
                feat_list.append(feat)
                
            if feat_list:
                input_feat = torch.cat(feat_list, dim=1)
                to_yield['input_feat'] = input_feat

            to_yield['input_seq'] = self.input_batches[0][i]

            to_yield['target'] = self.target_batches[i]
            to_yield['x_lengths'] = self.x_len_batches[i]
            
            if self.target_col:
                to_yield['y_length'] = self.y_len_batches[i]


            yield to_yield
            
            
    def __len__(self):
        """ Return iterator length.
        
        """
        return len(self.input_batches[0])

In [None]:
train_iterator = BatchIterator(train_dataset, batch_size=32, vocab_created=False, vocab=None, target_col=None,
                               word2index=None, sos_token='<SOS>', eos_token='<EOS>', unk_token='<UNK>',
                               pad_token='<PAD>', min_word_count=5, max_vocab_size=None, max_seq_len=0.8,
                               use_pretrained_vectors=False, glove_path='glove/', glove_name='glove.6B.100d.txt',
                               weights_file_name='glove/weights.npy')

Trimmed vocabulary using as minimum count threashold: count = 5.00
26330/129454 tokens has been retained
Trimmed input strings vocabulary
Trimmed input sequences lengths to the length of: 90
Mapped words to indices
Batches created


In [None]:
len(train_iterator.input_batches[0][0])

32

In [None]:
val_iterator = BatchIterator(val_dataset, batch_size=32, vocab_created=False, vocab=None, target_col=None,
                             word2index=train_iterator.word2index, sos_token='<SOS>', eos_token='<EOS>',
                             unk_token='<UNK>', pad_token='<PAD>', min_word_count=5, max_vocab_size=None,
                             max_seq_len=0.8, use_pretrained_vectors=False, glove_path='glove/',
                             glove_name='glove.6B.100d.txt', weights_file_name='glove/weights.npy')

Trimmed vocabulary using as minimum count threashold: count = 5.00
14177/58762 tokens has been retained
Trimmed input strings vocabulary
Trimmed input sequences lengths to the length of: 88
Mapped words to indices
Batches created


In [None]:
# Import the dataset. Use clean_review and label columns
train_dataset = pd.read_csv('dataset/datasets_feat_clean/train_feat_clean.csv', 
                            usecols=['clean_review', 'label'])

# Change columns order
train_dataset = train_dataset[['clean_review', 'label']]

In [None]:
# Import the dataset. Use clean_review and label columns
val_dataset = pd.read_csv('dataset/datasets_feat_clean/val_feat_clean.csv',
                          usecols=['clean_review', 'label'])

# Change columns order
val_dataset = val_dataset[['clean_review', 'label']]

In [None]:
train_iterator = BatchIterator(train_dataset, batch_size=256, vocab_created=False, vocab=None, target_col=None,
                               word2index=None, sos_token='<SOS>', eos_token='<EOS>', unk_token='<UNK>',
                               pad_token='<PAD>', min_word_count=3, max_vocab_size=None, max_seq_len=0.9,
                               use_pretrained_vectors=False, glove_path='glove/', glove_name='glove.6B.100d.txt',
                               weights_file_name='glove/weights.npy')

Trimmed vocabulary using as minimum count threashold: count = 3.00
35372/129454 tokens has been retained
Trimmed input strings vocabulary
Trimmed input sequences lengths to the length of: 144
Mapped words to indices
Batches created


In [None]:
val_iterator = BatchIterator(val_dataset, batch_size=256, vocab_created=False, vocab=None, target_col=None,
                             word2index=train_iterator.word2index, sos_token='<SOS>', eos_token='<EOS>',
                             unk_token='<UNK>', pad_token='<PAD>', min_word_count=3, max_vocab_size=None,
                             max_seq_len=0.9, use_pretrained_vectors=False, glove_path='glove/',
                             glove_name='glove.6B.100d.txt', weights_file_name='glove/weights.npy')

Trimmed vocabulary using as minimum count threashold: count = 3.00
19416/58762 tokens has been retained
Trimmed input strings vocabulary
Trimmed input sequences lengths to the length of: 141
Mapped words to indices
Batches created


In [None]:
for batches in train_iterator:
    # Unpack the dictionary of batches
    input_seq, target, x_lengths = batches['input_seq'], batches['target'], batches['x_lengths']
    print('input_seq shape: ', input_seq.size())
    print('target shape: ', target.size())
    print('x_lengths shape: ', x_lengths.size())
    break

input_seq shape:  torch.Size([256, 2])
target shape:  torch.Size([256])
x_lengths shape:  torch.Size([256])


In [None]:
!pip install pytorch-lightning
!pip install tensorboardX

In [None]:
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from sklearn.metrics import confusion_matrix
from tensorboardX import SummaryWriter
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import device
from tqdm import tqdm_notebook
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
class BiGRU(nn.Module):
  def __init__(self, hidden_size, vocab_size, embedding_dim, output_size, n_layers=1, dropout=0.2,
                 spatial_dropout=True, bidirectional=True):
        
        # Inherit everything from the nn.Module
        super(BiGRU, self).__init__()
        
        # Initialize attributes
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout_p = dropout
        self.spatial_dropout = spatial_dropout
        self.bidirectional = bidirectional
        self.n_directions = 2 if self.bidirectional else 1
        
        # Initialize layers
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
        self.dropout = nn.Dropout(self.dropout_p)
        if self.spatial_dropout:
            self.spatial_dropout1d = nn.Dropout2d(self.dropout_p)
        self.gru = nn.GRU(self.embedding_dim, self.hidden_size, num_layers=self.n_layers, 
                          dropout=(0 if n_layers == 1 else self.dropout_p), batch_first=True,
                          bidirectional=self.bidirectional)
        # Linear layer input size is equal to hidden_size * 3, becuase
        # we will concatenate max_pooling ,avg_pooling and last hidden state
        self.linear = nn.Linear(self.hidden_size * 3, self.output_size)

        
  def forward(self, input_seq, input_lengths, hidden=None):
       
        # Extract batch_size
        self.batch_size = input_seq.size(0)
        
        # Embeddings shapes
        # Input: (batch_size,  seq_length)
        # Output: (batch_size, seq_length, embedding_dim)
        emb_out = self.embedding(input_seq)
        
        if self.spatial_dropout:
            # Convert to (batch_size, embedding_dim, seq_length)
            emb_out = emb_out.permute(0, 2, 1)
            emb_out = self.spatial_dropout1d(emb_out)
            # Convert back to (batch_size, seq_length, embedding_dim)
            emb_out = emb_out.permute(0, 2, 1)
        else:
            emb_out = self.dropout(emb_out)
        
        # Pack padded batch of sequences for RNN module
        packed_emb = nn.utils.rnn.pack_padded_sequence(emb_out, input_lengths, batch_first=True)
                
       
        gru_out, hidden = self.gru(packed_emb, hidden)
       
        hidden = hidden.view(self.n_layers, self.n_directions, self.batch_size, self.hidden_size)
        last_hidden = hidden[-1]
        # last hidden shape (num_directions, batch_size, hidden_size)
        # Sum the last hidden state of forward and backward layer
        last_hidden = torch.sum(last_hidden, dim=0)
        # Summed last hidden shape (batch_size, hidden_size)
        
        # Pad a packed batch
        # gru_out output shape: (batch_size, seq_len, hidden_size*num_directions)
        gru_out, lengths = nn.utils.rnn.pad_packed_sequence(gru_out, batch_first=True)
              
        # Sum the gru_out along the num_directions
        if self.bidirectional:
            gru_out = gru_out[:,:,:self.hidden_size] + gru_out[:,:,self.hidden_size:]
        
        # Select the maximum value over each dimension of the hidden representation (max pooling)
        # Permute the input tensor to dimensions: (batch_size, hidden, seq_len)
        # Output dimensions: (batch_size, hidden_size)
        max_pool = F.adaptive_max_pool1d(gru_out.permute(0,2,1), (1,)).view(self.batch_size,-1)
        
        # Consider the average of the representations (mean pooling)
        # Sum along the batch axis and divide by the corresponding lengths (FloatTensor)
        # Output shape: (batch_size, hidden_size)
        avg_pool = torch.sum(gru_out, dim=1) / lengths.view(-1,1).type(torch.FloatTensor) 

        # Concatenate max_pooling, avg_pooling and last hidden state tensors
        concat_out = torch.cat([last_hidden, max_pool, avg_pool], dim=1)

        #concat_out = self.dropout(concat_out)
        out = self.linear(concat_out)
        return F.log_softmax(out, dim=-1)
    
    
  def add_loss_fn(self, loss_fn):
        """Add loss function to the model.
        
        """
        self.loss_fn = loss_fn
        

  def add_optimizer(self, optimizer):
        """Add optimizer to the model.
        
        """
        self.optimizer = optimizer
        
        
  def add_device(self, device=torch.device('cpu')):
        """Specify the device.
        
        """
        self.device = device
    
    
  def train_model(self, train_iterator):
   
        self.train()
        
        train_losses = []
        losses = []
        losses_list = []
        num_seq = 0
        batch_correct = 0
            
        for i, batches in tqdm_notebook(enumerate(train_iterator, 1), total=len(train_iterator), desc='Training'):
            input_seq, target, x_lengths = batches['input_seq'], batches['target'], batches['x_lengths']
            
            input_seq.to(self.device)
            target.to(self.device)
            x_lengths.to(self.device)
            
            self.optimizer.zero_grad()

            pred = self.forward(input_seq, x_lengths)
            loss = self.loss_fn(pred, target)
            loss.backward()
            losses.append(loss.data.cpu().numpy())
            self.optimizer.step()
            
            losses_list.append(loss.data.cpu().numpy())
            
            pred = torch.argmax(pred, 1)

            if self.device.type == 'cpu':
                batch_correct += (pred.cpu() == target.cpu()).sum().item()

            else:
                batch_correct += (pred == target).sum().item()

            num_seq += len(input_seq)     
    
            if i % 100 == 0:
                avg_train_loss = np.mean(losses)
                train_losses.append(avg_train_loss)
                
                accuracy = batch_correct / num_seq
                
                print('Iteration: {}. Average training loss: {:.4f}. Accuracy: {:.3f}'\
                      .format(i, avg_train_loss, accuracy))
                
                losses = []
                
            avg_loss = np.mean(losses_list)
            accuracy = batch_correct / num_seq
                              
        return train_losses, avg_loss, accuracy
    
    
  def evaluate_model(self, eval_iterator, conf_mtx=False):
        
        
        self.eval()
        
        eval_losses = []
        losses = []
        losses_list = []
        num_seq = 0
        batch_correct = 0
        pred_total = torch.LongTensor()
        target_total = torch.LongTensor()
        
        with torch.no_grad():
            for i, batches in tqdm_notebook(enumerate(eval_iterator, 1), total=len(eval_iterator), desc='Evaluation'):
                input_seq, target, x_lengths = batches['input_seq'], batches['target'], batches['x_lengths']
                
                input_seq.to(self.device)
                target.to(self.device)
                x_lengths.to(self.device)

                pred = self.forward(input_seq, x_lengths)
                loss = self.loss_fn(pred, target)
                losses.append(loss.data.cpu().numpy())
                losses_list.append(loss.data.cpu().numpy())
                
                pred = torch.argmax(pred, 1)
                                
                if self.device.type == 'cpu':
                    batch_correct += (pred.cpu() == target.cpu()).sum().item()
                    
                else:
                    batch_correct += (pred == target).sum().item()
                    
                num_seq += len(input_seq)     
                
                pred_total = torch.cat([pred_total, pred], dim=0)
                target_total = torch.cat([target_total, target], dim=0)
                
                if i % 100 == 0:
                    avg_batch_eval_loss = np.mean(losses)
                    eval_losses.append(avg_batch_eval_loss)
                    
                    accuracy = batch_correct / num_seq
                    
                    print('Iteration: {}. Average evaluation loss: {:.4f}. Accuracy: {:.2f}'\
                          .format(i, avg_batch_eval_loss, accuracy))

                    losses = []
                    
            avg_loss_list = []
                    
            avg_loss = np.mean(losses_list)
            accuracy = batch_correct / num_seq
            
            conf_matrix = confusion_matrix(target_total.view(-1), pred_total.view(-1))
        
        if conf_mtx:
            print('\tConfusion matrix: ', conf_matrix)
            
        return eval_losses, avg_loss, accuracy, conf_matrix

In [None]:
# Initialize parameters
hidden_size = 8
vocab_size = len(train_iterator.word2index)
embedding_dim = 200
output_size = 2
n_layers = 1
dropout = 0.5
learning_rate = 0.001
epochs = 20
spatial_dropout = True

# Check whether system supports CUDA
CUDA = torch.cuda.is_available()

model = BiGRU(hidden_size, vocab_size, embedding_dim, output_size, n_layers, dropout,
              spatial_dropout, bidirectional=True)

# Move the model to GPU if possible
#if CUDA:
   # model.cuda()

model.add_loss_fn(nn.NLLLoss())

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
model.add_optimizer(optimizer)

device = torch.device('cuda' if CUDA else 'cpu')

model.add_device(device)

# Instantiate the EarlyStopping
#early_stop = EarlyStopping(wait_epochs=1)
early_stop = EarlyStopping(monitor="val_accuracy", min_delta=0.00, patience=3, verbose=False, mode="max")

train_losses_list, train_avg_loss_list, train_accuracy_list = [], [], []
eval_avg_loss_list, eval_accuracy_list, conf_matrix_list = [], [], []

for epoch in range(epochs):
    
    print('\nStart epoch [{}/{}]'.format(epoch+1, epochs))
    
    train_losses, train_avg_loss, train_accuracy = model.train_model(train_iterator)
    
    train_losses_list.append(train_losses)
    train_avg_loss_list.append(train_avg_loss)
    train_accuracy_list.append(train_accuracy)
    
    _, eval_avg_loss, eval_accuracy, conf_matrix = model.evaluate_model(val_iterator)
    
    eval_avg_loss_list.append(eval_avg_loss)
    eval_accuracy_list.append(eval_accuracy)
    conf_matrix_list.append(conf_matrix)
    
    print('\nEpoch [{}/{}]: Train accuracy: {:.3f}. Train loss: {:.4f}. Evaluation accuracy: {:.3f}. Evaluation loss: {:.4f}'\
          .format(epoch+1, epochs, train_accuracy, train_avg_loss, eval_accuracy, eval_avg_loss))
    
   # if early_stop.stop(eval_avg_loss, model, delta=0.003):
     #   break

    


Start epoch [1/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.4055. Accuracy: 0.757
Iteration: 200. Average training loss: 0.4332. Accuracy: 0.742


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [1/20]: Train accuracy: 0.757. Train loss: 0.3952. Evaluation accuracy: 0.807. Evaluation loss: 0.3352

Start epoch [2/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.3196. Accuracy: 0.809
Iteration: 200. Average training loss: 0.3908. Accuracy: 0.795


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [2/20]: Train accuracy: 0.808. Train loss: 0.3371. Evaluation accuracy: 0.864. Evaluation loss: 0.2863

Start epoch [3/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.2814. Accuracy: 0.855
Iteration: 200. Average training loss: 0.3359. Accuracy: 0.846


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [3/20]: Train accuracy: 0.853. Train loss: 0.2930. Evaluation accuracy: 0.891. Evaluation loss: 0.2361

Start epoch [4/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.2411. Accuracy: 0.884
Iteration: 200. Average training loss: 0.2862. Accuracy: 0.876


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [4/20]: Train accuracy: 0.882. Train loss: 0.2512. Evaluation accuracy: 0.906. Evaluation loss: 0.2060

Start epoch [5/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.2053. Accuracy: 0.906
Iteration: 200. Average training loss: 0.2528. Accuracy: 0.897


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [5/20]: Train accuracy: 0.901. Train loss: 0.2192. Evaluation accuracy: 0.915. Evaluation loss: 0.1918

Start epoch [6/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1842. Accuracy: 0.920
Iteration: 200. Average training loss: 0.2317. Accuracy: 0.909


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [6/20]: Train accuracy: 0.913. Train loss: 0.1995. Evaluation accuracy: 0.921. Evaluation loss: 0.1834

Start epoch [7/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1726. Accuracy: 0.924
Iteration: 200. Average training loss: 0.2163. Accuracy: 0.915


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [7/20]: Train accuracy: 0.918. Train loss: 0.1864. Evaluation accuracy: 0.923. Evaluation loss: 0.1786

Start epoch [8/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1593. Accuracy: 0.933
Iteration: 200. Average training loss: 0.2005. Accuracy: 0.924


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [8/20]: Train accuracy: 0.927. Train loss: 0.1729. Evaluation accuracy: 0.926. Evaluation loss: 0.1746

Start epoch [9/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1512. Accuracy: 0.936
Iteration: 200. Average training loss: 0.1920. Accuracy: 0.928


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [9/20]: Train accuracy: 0.930. Train loss: 0.1649. Evaluation accuracy: 0.928. Evaluation loss: 0.1711

Start epoch [10/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1434. Accuracy: 0.940
Iteration: 200. Average training loss: 0.1817. Accuracy: 0.932


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [10/20]: Train accuracy: 0.935. Train loss: 0.1561. Evaluation accuracy: 0.931. Evaluation loss: 0.1691

Start epoch [11/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1369. Accuracy: 0.943
Iteration: 200. Average training loss: 0.1745. Accuracy: 0.936


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [11/20]: Train accuracy: 0.939. Train loss: 0.1497. Evaluation accuracy: 0.932. Evaluation loss: 0.1679

Start epoch [12/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1290. Accuracy: 0.949
Iteration: 200. Average training loss: 0.1672. Accuracy: 0.940


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [12/20]: Train accuracy: 0.943. Train loss: 0.1421. Evaluation accuracy: 0.933. Evaluation loss: 0.1672

Start epoch [13/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1266. Accuracy: 0.947
Iteration: 200. Average training loss: 0.1607. Accuracy: 0.941


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [13/20]: Train accuracy: 0.944. Train loss: 0.1376. Evaluation accuracy: 0.934. Evaluation loss: 0.1663

Start epoch [14/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1190. Accuracy: 0.952
Iteration: 200. Average training loss: 0.1525. Accuracy: 0.945


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [14/20]: Train accuracy: 0.948. Train loss: 0.1306. Evaluation accuracy: 0.935. Evaluation loss: 0.1665

Start epoch [15/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1166. Accuracy: 0.952
Iteration: 200. Average training loss: 0.1489. Accuracy: 0.946


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [15/20]: Train accuracy: 0.949. Train loss: 0.1268. Evaluation accuracy: 0.936. Evaluation loss: 0.1667

Start epoch [16/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1108. Accuracy: 0.955
Iteration: 200. Average training loss: 0.1434. Accuracy: 0.949


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [16/20]: Train accuracy: 0.952. Train loss: 0.1218. Evaluation accuracy: 0.936. Evaluation loss: 0.1676

Start epoch [17/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1090. Accuracy: 0.957
Iteration: 200. Average training loss: 0.1371. Accuracy: 0.951


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [17/20]: Train accuracy: 0.953. Train loss: 0.1179. Evaluation accuracy: 0.936. Evaluation loss: 0.1682

Start epoch [18/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1033. Accuracy: 0.959
Iteration: 200. Average training loss: 0.1326. Accuracy: 0.954


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [18/20]: Train accuracy: 0.955. Train loss: 0.1136. Evaluation accuracy: 0.937. Evaluation loss: 0.1692

Start epoch [19/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1017. Accuracy: 0.959
Iteration: 200. Average training loss: 0.1274. Accuracy: 0.955


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [19/20]: Train accuracy: 0.957. Train loss: 0.1102. Evaluation accuracy: 0.937. Evaluation loss: 0.1697

Start epoch [20/20]


Training:   0%|          | 0/258 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.0958. Accuracy: 0.962
Iteration: 200. Average training loss: 0.1267. Accuracy: 0.957


Evaluation:   0%|          | 0/74 [00:00<?, ?it/s]


Epoch [20/20]: Train accuracy: 0.958. Train loss: 0.1072. Evaluation accuracy: 0.937. Evaluation loss: 0.1713


In [None]:
# Import the dataset. 
train_dataset = pd.read_csv('dataset/datasets_feat_clean/train_feat_clean.csv', 
                            usecols=['clean_review', 'polarity', 'subjectivity', 'word_count', 'UPPERCASE', 'DIGITS',
                                      'PROPN', 'VERB', 'NOUN', 'PUNCT', 'ADJ', 'label'])

# Change columns order
train_dataset = train_dataset[['clean_review', 'polarity', 'subjectivity', 'word_count', 'UPPERCASE', 'DIGITS',
                               'PROPN', 'VERB', 'NOUN', 'PUNCT', 'ADJ', 'label']]

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
# Instantiate the StandardScaler
train_scaler = StandardScaler()
# Scale the features
train_dataset.iloc[:, 3:11] = train_scaler.fit_transform(train_dataset.iloc[:, 3:11])

In [None]:
# Import the dataset. Use clean_review and label columns
val_dataset = pd.read_csv('dataset/datasets_feat_clean/val_feat_clean.csv',
                          usecols=['clean_review', 'polarity', 'subjectivity', 'word_count', 'UPPERCASE', 'DIGITS',
                                      'PROPN', 'VERB', 'NOUN', 'PUNCT', 'ADJ', 'label'])

# Change columns order
val_dataset = val_dataset[['clean_review', 'polarity', 'subjectivity', 'word_count', 'UPPERCASE', 'DIGITS',
                           'PROPN', 'VERB', 'NOUN', 'PUNCT', 'ADJ', 'label']]

In [None]:
# Instantiate the StandardScaler
val_scaler = StandardScaler()
# Scale the features
val_dataset.iloc[:, 3:11] = val_scaler.fit_transform(val_dataset.iloc[:, 3:11])

In [None]:
train_iterator = BatchIterator(train_dataset, batch_size=64, vocab_created=False, vocab=None, target_col=None,
                               word2index=None, sos_token='<SOS>', eos_token='<EOS>', unk_token='<UNK>',
                               pad_token='<PAD>', min_word_count=3, max_vocab_size=None, max_seq_len=0.9,
                               use_pretrained_vectors=False, glove_path='glove/', glove_name='glove.6B.100d.txt',
                               weights_file_name='glove/weights.npy')

Trimmed vocabulary using as minimum count threashold: count = 3.00
35372/129454 tokens has been retained
Trimmed input strings vocabulary
Trimmed input sequences lengths to the length of: 144
Mapped words to indices
Batches created


In [None]:
val_iterator = BatchIterator(val_dataset, batch_size=64, vocab_created=False, vocab=None, target_col=None,
                             word2index=train_iterator.word2index, sos_token='<SOS>', eos_token='<EOS>',
                             unk_token='<UNK>', pad_token='<PAD>', min_word_count=3, max_vocab_size=None,
                             max_seq_len=0.9, use_pretrained_vectors=False, glove_path='glove/',
                             glove_name='glove.6B.100d.txt', weights_file_name='glove/weights.npy')

Trimmed vocabulary using as minimum count threashold: count = 3.00
19416/58762 tokens has been retained
Trimmed input strings vocabulary
Trimmed input sequences lengths to the length of: 141
Mapped words to indices
Batches created


In [None]:
for batches in train_iterator:

    # Unpack the dictionary of batches
    input_seq, input_feat, target, x_lengths = batches['input_seq'], batches['input_feat'],\
                                               batches['target'], batches['x_lengths']
        
    print('input_seq shape: ', input_seq.size())
    print('input_feat shape: ', input_feat.size())
    print('target shape: ', target.size())
    print('x_lengths shape: ', x_lengths.size())
    break

input_seq shape:  torch.Size([64, 58])
input_feat shape:  torch.Size([64, 10])
target shape:  torch.Size([64])
x_lengths shape:  torch.Size([64])


In [None]:
for batches in val_iterator:
    # Unpack the dictionary of batches
    input_seq, input_feat, target, x_lengths = batches['input_seq'], batches['input_feat'],\
                                               batches['target'], batches['x_lengths']
    print('input_seq shape: ', input_seq.size())
    print('input_feat shape: ', input_feat.size())
    print('target shape: ', target.size())
    print('x_lengths shape: ', x_lengths.size())
    break

input_seq shape:  torch.Size([64, 18])
input_feat shape:  torch.Size([64, 10])
target shape:  torch.Size([64])
x_lengths shape:  torch.Size([64])


In [None]:
# with additional features 
class BiGRU(nn.Module):
  
    
    def __init__(self, hidden_size, vocab_size, n_extra_feat, embedding_dim, output_size, n_layers=1, dropout=0.2,
                 spatial_dropout=True, bidirectional=True):
        
        # Inherit everything from the nn.Module
        super(BiGRU, self).__init__()
        
        # Initialize attributes
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.n_extra_feat = n_extra_feat
        self.embedding_dim = embedding_dim
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout_p = dropout
        self.spatial_dropout = spatial_dropout
        self.bidirectional = bidirectional
        self.n_directions = 2 if self.bidirectional else 1
        
        # Initialize layers
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
        self.dropout = nn.Dropout(self.dropout_p)
        if self.spatial_dropout:
            self.spatial_dropout1d = nn.Dropout2d(self.dropout_p)
        self.gru = nn.GRU(self.embedding_dim, self.hidden_size, num_layers=self.n_layers, 
                          dropout=(0 if n_layers == 1 else self.dropout_p), batch_first=True,
                          bidirectional=self.bidirectional)
        # Linear layer input size is equal to hidden_size * 3 + n_extra_feat, becuase
        # we will concatenate max_pooling ,avg_pooling, last hidden state and additional features
        self.linear = nn.Linear(self.hidden_size * 3 + self.n_extra_feat, self.output_size)

        
    def forward(self, input_seq, input_feat, input_lengths, hidden=None):
      
        # Extract batch_size
        self.batch_size = input_seq.size(0)
        
        # Embeddings shapes
        # Input: (batch_size,  seq_length)
        # Output: (batch_size, seq_length, embedding_dim)
        emb_out = self.embedding(input_seq)
        
        if self.spatial_dropout:
            # Convert to (batch_size, embedding_dim, seq_length)
            emb_out = emb_out.permute(0, 2, 1)
            emb_out = self.spatial_dropout1d(emb_out)
            # Convert back to (batch_size, seq_length, embedding_dim)
            emb_out = emb_out.permute(0, 2, 1)
        else:
            emb_out = self.dropout(emb_out)
        
        # Pack padded batch of sequences for RNN module
        packed_emb = nn.utils.rnn.pack_padded_sequence(emb_out, input_lengths, batch_first=True)
                
        # GRU input/output shapes, if batch_first=True
        # Input: (batch_size, seq_len, embedding_dim)
        # Output: (batch_size, seq_len, hidden_size*num_directions)
        # Number of directions = 2 when used bidirectional, otherwise 1
        # shape of hidden: (n_layers x num_directions, batch_size, hidden_size)
        # Hidden state defaults to zero if not provided
        gru_out, hidden = self.gru(packed_emb, hidden)
        # gru_out: tensor containing the output features h_t from the last layer of the GRU
        # gru_out comprises all the hidden states in the last layer ("last" depth-wise, not time-wise)
        # For biGRu gru_out is the concatenation of a forward GRU representation and a backward GRU representation
        # hidden (h_n) comprises the hidden states after the last timestep
        
        # Extract and sum last hidden state
        # Input hidden shape: (n_layers x num_directions, batch_size, hidden_size)
        # Separate hidden state layers
        hidden = hidden.view(self.n_layers, self.n_directions, self.batch_size, self.hidden_size)
        last_hidden = hidden[-1]
        # last hidden shape (num_directions, batch_size, hidden_size)
        # Sum the last hidden state of forward and backward layer
        last_hidden = torch.sum(last_hidden, dim=0)
        # Summed last hidden shape (batch_size, hidden_size)
        
        # Pad a packed batch
        # gru_out output shape: (batch_size, seq_len, hidden_size*num_directions)
        gru_out, lengths = nn.utils.rnn.pad_packed_sequence(gru_out, batch_first=True)
              
        # Sum the gru_out along the num_directions
        if self.bidirectional:
            gru_out = gru_out[:,:,:self.hidden_size] + gru_out[:,:,self.hidden_size:]
        
        # Select the maximum value over each dimension of the hidden representation (max pooling)
        # Permute the input tensor to dimensions: (batch_size, hidden, seq_len)
        # Output dimensions: (batch_size, hidden_size)
        max_pool = F.adaptive_max_pool1d(gru_out.permute(0,2,1), (1,)).view(self.batch_size,-1)
        
        # Consider the average of the representations (mean pooling)
        # Sum along the batch axis and divide by the corresponding lengths (FloatTensor)
        # Output shape: (batch_size, hidden_size)
        avg_pool = torch.sum(gru_out, dim=1) / lengths.view(-1,1).type(torch.FloatTensor) 

        # Concatenate max_pooling, avg_pooling, hidden state and input_feat tensor
        concat_out = torch.cat([last_hidden, max_pool, avg_pool, input_feat], dim=1)

        # concat_out = self.dropout(concat_out)
        out = self.linear(concat_out)
        return F.log_softmax(out, dim=-1)
    
    
    def add_loss_fn(self, loss_fn):
        """Add loss function to the model.
        
        """
        self.loss_fn = loss_fn
        

    def add_optimizer(self, optimizer):
        """Add optimizer to the model.
        
        """
        self.optimizer = optimizer
        
        
    def add_device(self, device=torch.device('cpu')):
        """Specify the device.
        
        """
        self.device = device
    
    
    def train_model(self, train_iterator):
          
        self.train()
        
        train_losses = []
        losses = []
        losses_list = []
        num_seq = 0
        batch_correct = 0
            
        for i, batches in tqdm_notebook(enumerate(train_iterator, 1), total=len(train_iterator), desc='Training'):
            input_seq, input_feat, target, x_lengths = batches['input_seq'], batches['input_feat'],\
                                                       batches['target'], batches['x_lengths']
            
            input_seq.to(self.device)
            input_feat.to(self.device)
            target.to(self.device)
            x_lengths.to(self.device)
            
            self.optimizer.zero_grad()

            pred = self.forward(input_seq, input_feat, x_lengths)
            loss = self.loss_fn(pred, target)
            loss.backward()
            losses.append(loss.data.cpu().numpy())
            self.optimizer.step()
            
            losses_list.append(loss.data.cpu().numpy())
            
            pred = torch.argmax(pred, 1)

            if self.device.type == 'cpu':
                batch_correct += (pred.cpu() == target.cpu()).sum().item()

            else:
                batch_correct += (pred == target).sum().item()

            num_seq += len(input_seq)     
    
            if i % 100 == 0:
                avg_train_loss = np.mean(losses)
                train_losses.append(avg_train_loss)
                
                accuracy = batch_correct / num_seq
                
                print('Iteration: {}. Average training loss: {:.4f}. Accuracy: {:.3f}'\
                      .format(i, avg_train_loss, accuracy))
                
                losses = []
                
            avg_loss = np.mean(losses_list)
            accuracy = batch_correct / num_seq
                              
        return train_losses, avg_loss, accuracy
    
    
    def evaluate_model(self, eval_iterator, conf_mtx=False):
       
        self.eval()
        
        eval_losses = []
        losses = []
        losses_list = []
        num_seq = 0
        batch_correct = 0
        pred_total = torch.LongTensor()
        target_total = torch.LongTensor()
        
        with torch.no_grad():
            for i, batches in tqdm_notebook(enumerate(eval_iterator, 1), total=len(eval_iterator), desc='Evaluation'):
                input_seq, input_feat, target, x_lengths = batches['input_seq'], batches['input_feat'],\
                                                           batches['target'], batches['x_lengths']
                
                input_seq.to(self.device)
                input_feat.to(self.device)
                target.to(self.device)
                x_lengths.to(self.device)

                pred = self.forward(input_seq, input_feat, x_lengths)
                loss = self.loss_fn(pred, target)
                losses.append(loss.data.cpu().numpy())
                losses_list.append(loss.data.cpu().numpy())
                
                pred = torch.argmax(pred, 1)
                                
                if self.device.type == 'cpu':
                    batch_correct += (pred.cpu() == target.cpu()).sum().item()
                    
                else:
                    batch_correct += (pred == target).sum().item()
                    
                num_seq += len(input_seq)     
                
                pred_total = torch.cat([pred_total, pred], dim=0)
                target_total = torch.cat([target_total, target], dim=0)
                
                if i % 100 == 0:
                    avg_batch_eval_loss = np.mean(losses)
                    eval_losses.append(avg_batch_eval_loss)
                    
                    accuracy = batch_correct / num_seq
                    
                    print('Iteration: {}. Average evaluation loss: {:.4f}. Accuracy: {:.2f}'\
                          .format(i, avg_batch_eval_loss, accuracy))

                    losses = []
                    
            avg_loss_list = []
                    
            avg_loss = np.mean(losses_list)
            accuracy = batch_correct / num_seq
            
            conf_matrix = confusion_matrix(target_total.view(-1), pred_total.view(-1))
        
        if conf_mtx:
            print('\tConfusion matrix: ', conf_matrix)
            
        return eval_losses, avg_loss, accuracy, conf_matrix

In [None]:
# Initialize parameters
hidden_size = 8
vocab_size = len(train_iterator.word2index)
n_extra_feat = 10
embedding_dim = 200
output_size = 2
n_layers = 1
dropout = 0.5
learning_rate = 0.001
epochs = 20
spatial_dropout = True

# Check whether system supports CUDA
CUDA = torch.cuda.is_available()

model = BiGRU(hidden_size, vocab_size, n_extra_feat, embedding_dim, output_size, n_layers, dropout,
              spatial_dropout, bidirectional=True)

# Move the model to GPU if possible
if CUDA:
    model.cuda()

model.add_loss_fn(nn.NLLLoss())

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
model.add_optimizer(optimizer)

device = torch.device('cuda' if CUDA else 'cpu')

model.add_device(device)

# Instantiate the EarlyStopping
early_stop = EarlyStopping(monitor="val_accuracy", min_delta=0.00, patience=3, verbose=False, mode="max")

train_losses_list, train_avg_loss_list, train_accuracy_list = [], [], []
eval_avg_loss_list, eval_accuracy_list, conf_matrix_list = [], [], []

for epoch in range(epochs):
    
    print('\nStart epoch [{}/{}]'.format(epoch+1, epochs))
    
    train_losses, train_avg_loss, train_accuracy = model.train_model(train_iterator)
    
    train_losses_list.append(train_losses)
    train_avg_loss_list.append(train_avg_loss)
    train_accuracy_list.append(train_accuracy)
    
    _, eval_avg_loss, eval_accuracy, conf_matrix = model.evaluate_model(val_iterator)
    
    eval_avg_loss_list.append(eval_avg_loss)
    eval_accuracy_list.append(eval_accuracy)
    conf_matrix_list.append(conf_matrix)
    
    print('\nEpoch [{}/{}]: Train accuracy: {:.3f}. Train loss: {:.4f}. Evaluation accuracy: {:.3f}. Evaluation loss: {:.4f}'\
          .format(epoch+1, epochs, train_accuracy, train_avg_loss, eval_accuracy, eval_avg_loss))
    
    #if early_stop.stop(eval_avg_loss, model, delta=0.003):
       # break


Start epoch [1/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.4461. Accuracy: 0.745
Iteration: 200. Average training loss: 0.3348. Accuracy: 0.766
Iteration: 300. Average training loss: 0.3479. Accuracy: 0.775
Iteration: 400. Average training loss: 0.2881. Accuracy: 0.790
Iteration: 500. Average training loss: 0.2890. Accuracy: 0.801
Iteration: 600. Average training loss: 0.2899. Accuracy: 0.809
Iteration: 700. Average training loss: 0.2980. Accuracy: 0.815
Iteration: 800. Average training loss: 0.2771. Accuracy: 0.821
Iteration: 900. Average training loss: 0.2395. Accuracy: 0.829
Iteration: 1000. Average training loss: 0.2794. Accuracy: 0.833


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.2245. Accuracy: 0.90
Iteration: 200. Average evaluation loss: 0.2300. Accuracy: 0.90

Epoch [1/20]: Train accuracy: 0.834. Train loss: 0.3073. Evaluation accuracy: 0.903. Evaluation loss: 0.2172

Start epoch [2/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.2202. Accuracy: 0.894
Iteration: 200. Average training loss: 0.2280. Accuracy: 0.894
Iteration: 300. Average training loss: 0.2392. Accuracy: 0.895
Iteration: 400. Average training loss: 0.2053. Accuracy: 0.898
Iteration: 500. Average training loss: 0.2072. Accuracy: 0.900
Iteration: 600. Average training loss: 0.2037. Accuracy: 0.901
Iteration: 700. Average training loss: 0.2168. Accuracy: 0.901
Iteration: 800. Average training loss: 0.1960. Accuracy: 0.903
Iteration: 900. Average training loss: 0.1820. Accuracy: 0.905
Iteration: 1000. Average training loss: 0.2111. Accuracy: 0.905


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1695. Accuracy: 0.93
Iteration: 200. Average evaluation loss: 0.1784. Accuracy: 0.93

Epoch [2/20]: Train accuracy: 0.905. Train loss: 0.2106. Evaluation accuracy: 0.928. Evaluation loss: 0.1673

Start epoch [3/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1688. Accuracy: 0.923
Iteration: 200. Average training loss: 0.1781. Accuracy: 0.922
Iteration: 300. Average training loss: 0.1931. Accuracy: 0.919
Iteration: 400. Average training loss: 0.1738. Accuracy: 0.920
Iteration: 500. Average training loss: 0.1638. Accuracy: 0.921
Iteration: 600. Average training loss: 0.1730. Accuracy: 0.922
Iteration: 700. Average training loss: 0.1836. Accuracy: 0.921
Iteration: 800. Average training loss: 0.1645. Accuracy: 0.923
Iteration: 900. Average training loss: 0.1525. Accuracy: 0.924
Iteration: 1000. Average training loss: 0.1856. Accuracy: 0.924


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1548. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1648. Accuracy: 0.93

Epoch [3/20]: Train accuracy: 0.924. Train loss: 0.1736. Evaluation accuracy: 0.935. Evaluation loss: 0.1540

Start epoch [4/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1504. Accuracy: 0.938
Iteration: 200. Average training loss: 0.1532. Accuracy: 0.938
Iteration: 300. Average training loss: 0.1675. Accuracy: 0.934
Iteration: 400. Average training loss: 0.1520. Accuracy: 0.935
Iteration: 500. Average training loss: 0.1486. Accuracy: 0.936
Iteration: 600. Average training loss: 0.1516. Accuracy: 0.935
Iteration: 700. Average training loss: 0.1621. Accuracy: 0.935
Iteration: 800. Average training loss: 0.1415. Accuracy: 0.936
Iteration: 900. Average training loss: 0.1308. Accuracy: 0.937
Iteration: 1000. Average training loss: 0.1642. Accuracy: 0.937


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1479. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1583. Accuracy: 0.94

Epoch [4/20]: Train accuracy: 0.936. Train loss: 0.1520. Evaluation accuracy: 0.939. Evaluation loss: 0.1482

Start epoch [5/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1312. Accuracy: 0.945
Iteration: 200. Average training loss: 0.1398. Accuracy: 0.945
Iteration: 300. Average training loss: 0.1531. Accuracy: 0.941
Iteration: 400. Average training loss: 0.1364. Accuracy: 0.942
Iteration: 500. Average training loss: 0.1306. Accuracy: 0.942
Iteration: 600. Average training loss: 0.1413. Accuracy: 0.942
Iteration: 700. Average training loss: 0.1484. Accuracy: 0.941
Iteration: 800. Average training loss: 0.1378. Accuracy: 0.942
Iteration: 900. Average training loss: 0.1227. Accuracy: 0.943
Iteration: 1000. Average training loss: 0.1514. Accuracy: 0.942


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1455. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1542. Accuracy: 0.94

Epoch [5/20]: Train accuracy: 0.942. Train loss: 0.1393. Evaluation accuracy: 0.940. Evaluation loss: 0.1451

Start epoch [6/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1237. Accuracy: 0.948
Iteration: 200. Average training loss: 0.1274. Accuracy: 0.948
Iteration: 300. Average training loss: 0.1398. Accuracy: 0.946
Iteration: 400. Average training loss: 0.1251. Accuracy: 0.947
Iteration: 500. Average training loss: 0.1199. Accuracy: 0.948
Iteration: 600. Average training loss: 0.1278. Accuracy: 0.948
Iteration: 700. Average training loss: 0.1364. Accuracy: 0.948
Iteration: 800. Average training loss: 0.1246. Accuracy: 0.948
Iteration: 900. Average training loss: 0.1197. Accuracy: 0.948
Iteration: 1000. Average training loss: 0.1390. Accuracy: 0.947


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1457. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1532. Accuracy: 0.94

Epoch [6/20]: Train accuracy: 0.947. Train loss: 0.1284. Evaluation accuracy: 0.941. Evaluation loss: 0.1445

Start epoch [7/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1180. Accuracy: 0.952
Iteration: 200. Average training loss: 0.1211. Accuracy: 0.952
Iteration: 300. Average training loss: 0.1326. Accuracy: 0.950
Iteration: 400. Average training loss: 0.1127. Accuracy: 0.952
Iteration: 500. Average training loss: 0.1118. Accuracy: 0.952
Iteration: 600. Average training loss: 0.1196. Accuracy: 0.952
Iteration: 700. Average training loss: 0.1284. Accuracy: 0.952
Iteration: 800. Average training loss: 0.1154. Accuracy: 0.952
Iteration: 900. Average training loss: 0.1081. Accuracy: 0.953
Iteration: 1000. Average training loss: 0.1304. Accuracy: 0.952


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1461. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1529. Accuracy: 0.94

Epoch [7/20]: Train accuracy: 0.952. Train loss: 0.1195. Evaluation accuracy: 0.943. Evaluation loss: 0.1446

Start epoch [8/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1030. Accuracy: 0.955
Iteration: 200. Average training loss: 0.1136. Accuracy: 0.955
Iteration: 300. Average training loss: 0.1218. Accuracy: 0.954
Iteration: 400. Average training loss: 0.1085. Accuracy: 0.955
Iteration: 500. Average training loss: 0.1041. Accuracy: 0.955
Iteration: 600. Average training loss: 0.1096. Accuracy: 0.955
Iteration: 700. Average training loss: 0.1216. Accuracy: 0.955
Iteration: 800. Average training loss: 0.1078. Accuracy: 0.955
Iteration: 900. Average training loss: 0.1040. Accuracy: 0.956
Iteration: 1000. Average training loss: 0.1227. Accuracy: 0.955


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1491. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1573. Accuracy: 0.94

Epoch [8/20]: Train accuracy: 0.955. Train loss: 0.1117. Evaluation accuracy: 0.942. Evaluation loss: 0.1479

Start epoch [9/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.0974. Accuracy: 0.960
Iteration: 200. Average training loss: 0.1046. Accuracy: 0.960
Iteration: 300. Average training loss: 0.1146. Accuracy: 0.959
Iteration: 400. Average training loss: 0.1058. Accuracy: 0.959
Iteration: 500. Average training loss: 0.1014. Accuracy: 0.959
Iteration: 600. Average training loss: 0.1060. Accuracy: 0.958
Iteration: 700. Average training loss: 0.1126. Accuracy: 0.958
Iteration: 800. Average training loss: 0.1037. Accuracy: 0.958
Iteration: 900. Average training loss: 0.0950. Accuracy: 0.959
Iteration: 1000. Average training loss: 0.1140. Accuracy: 0.958


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1504. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1574. Accuracy: 0.94

Epoch [9/20]: Train accuracy: 0.958. Train loss: 0.1053. Evaluation accuracy: 0.942. Evaluation loss: 0.1488

Start epoch [10/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.0894. Accuracy: 0.964
Iteration: 200. Average training loss: 0.0955. Accuracy: 0.962
Iteration: 300. Average training loss: 0.1131. Accuracy: 0.960
Iteration: 400. Average training loss: 0.0995. Accuracy: 0.960
Iteration: 500. Average training loss: 0.0899. Accuracy: 0.961
Iteration: 600. Average training loss: 0.0971. Accuracy: 0.961
Iteration: 700. Average training loss: 0.1069. Accuracy: 0.960
Iteration: 800. Average training loss: 0.0990. Accuracy: 0.960
Iteration: 900. Average training loss: 0.0944. Accuracy: 0.961
Iteration: 1000. Average training loss: 0.1078. Accuracy: 0.960


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1499. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1578. Accuracy: 0.94

Epoch [10/20]: Train accuracy: 0.961. Train loss: 0.0993. Evaluation accuracy: 0.944. Evaluation loss: 0.1487

Start epoch [11/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.0887. Accuracy: 0.966
Iteration: 200. Average training loss: 0.0940. Accuracy: 0.965
Iteration: 300. Average training loss: 0.1004. Accuracy: 0.963
Iteration: 400. Average training loss: 0.0906. Accuracy: 0.964
Iteration: 500. Average training loss: 0.0885. Accuracy: 0.964
Iteration: 600. Average training loss: 0.0954. Accuracy: 0.964
Iteration: 700. Average training loss: 0.1039. Accuracy: 0.963
Iteration: 800. Average training loss: 0.0911. Accuracy: 0.964
Iteration: 900. Average training loss: 0.0866. Accuracy: 0.964
Iteration: 1000. Average training loss: 0.0982. Accuracy: 0.964


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1517. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1594. Accuracy: 0.94

Epoch [11/20]: Train accuracy: 0.964. Train loss: 0.0937. Evaluation accuracy: 0.944. Evaluation loss: 0.1505

Start epoch [12/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.0789. Accuracy: 0.967
Iteration: 200. Average training loss: 0.0875. Accuracy: 0.966
Iteration: 300. Average training loss: 0.0899. Accuracy: 0.966
Iteration: 400. Average training loss: 0.0818. Accuracy: 0.967
Iteration: 500. Average training loss: 0.0822. Accuracy: 0.967
Iteration: 600. Average training loss: 0.0929. Accuracy: 0.967
Iteration: 700. Average training loss: 0.0998. Accuracy: 0.966
Iteration: 800. Average training loss: 0.0837. Accuracy: 0.966
Iteration: 900. Average training loss: 0.0818. Accuracy: 0.967
Iteration: 1000. Average training loss: 0.0930. Accuracy: 0.967


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1518. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1612. Accuracy: 0.94

Epoch [12/20]: Train accuracy: 0.967. Train loss: 0.0871. Evaluation accuracy: 0.945. Evaluation loss: 0.1519

Start epoch [13/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.0706. Accuracy: 0.972
Iteration: 200. Average training loss: 0.0798. Accuracy: 0.971
Iteration: 300. Average training loss: 0.0925. Accuracy: 0.969
Iteration: 400. Average training loss: 0.0819. Accuracy: 0.969
Iteration: 500. Average training loss: 0.0787. Accuracy: 0.969
Iteration: 600. Average training loss: 0.0842. Accuracy: 0.969
Iteration: 700. Average training loss: 0.0929. Accuracy: 0.968
Iteration: 800. Average training loss: 0.0822. Accuracy: 0.969
Iteration: 900. Average training loss: 0.0753. Accuracy: 0.969
Iteration: 1000. Average training loss: 0.0870. Accuracy: 0.969


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1567. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1645. Accuracy: 0.94

Epoch [13/20]: Train accuracy: 0.969. Train loss: 0.0825. Evaluation accuracy: 0.944. Evaluation loss: 0.1552

Start epoch [14/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.0710. Accuracy: 0.974
Iteration: 200. Average training loss: 0.0847. Accuracy: 0.971
Iteration: 300. Average training loss: 0.0903. Accuracy: 0.969
Iteration: 400. Average training loss: 0.0783. Accuracy: 0.969
Iteration: 500. Average training loss: 0.0731. Accuracy: 0.970
Iteration: 600. Average training loss: 0.0774. Accuracy: 0.970
Iteration: 700. Average training loss: 0.0836. Accuracy: 0.970
Iteration: 800. Average training loss: 0.0797. Accuracy: 0.970
Iteration: 900. Average training loss: 0.0768. Accuracy: 0.970
Iteration: 1000. Average training loss: 0.0812. Accuracy: 0.970


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1600. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1674. Accuracy: 0.94

Epoch [14/20]: Train accuracy: 0.970. Train loss: 0.0792. Evaluation accuracy: 0.944. Evaluation loss: 0.1588

Start epoch [15/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.0674. Accuracy: 0.974
Iteration: 200. Average training loss: 0.0763. Accuracy: 0.971
Iteration: 300. Average training loss: 0.0825. Accuracy: 0.970
Iteration: 400. Average training loss: 0.0749. Accuracy: 0.970
Iteration: 500. Average training loss: 0.0691. Accuracy: 0.971
Iteration: 600. Average training loss: 0.0779. Accuracy: 0.971
Iteration: 700. Average training loss: 0.0903. Accuracy: 0.970
Iteration: 800. Average training loss: 0.0717. Accuracy: 0.971
Iteration: 900. Average training loss: 0.0702. Accuracy: 0.971
Iteration: 1000. Average training loss: 0.0790. Accuracy: 0.971


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1642. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1726. Accuracy: 0.94

Epoch [15/20]: Train accuracy: 0.971. Train loss: 0.0757. Evaluation accuracy: 0.943. Evaluation loss: 0.1627

Start epoch [16/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.0676. Accuracy: 0.974
Iteration: 200. Average training loss: 0.0727. Accuracy: 0.973
Iteration: 300. Average training loss: 0.0865. Accuracy: 0.971
Iteration: 400. Average training loss: 0.0749. Accuracy: 0.971
Iteration: 500. Average training loss: 0.0650. Accuracy: 0.973
Iteration: 600. Average training loss: 0.0738. Accuracy: 0.972
Iteration: 700. Average training loss: 0.0751. Accuracy: 0.972
Iteration: 800. Average training loss: 0.0748. Accuracy: 0.972
Iteration: 900. Average training loss: 0.0681. Accuracy: 0.973
Iteration: 1000. Average training loss: 0.0743. Accuracy: 0.972


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1632. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1721. Accuracy: 0.94

Epoch [16/20]: Train accuracy: 0.972. Train loss: 0.0733. Evaluation accuracy: 0.944. Evaluation loss: 0.1627

Start epoch [17/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.0559. Accuracy: 0.978
Iteration: 200. Average training loss: 0.0681. Accuracy: 0.975
Iteration: 300. Average training loss: 0.0745. Accuracy: 0.974
Iteration: 400. Average training loss: 0.0666. Accuracy: 0.975
Iteration: 500. Average training loss: 0.0571. Accuracy: 0.975
Iteration: 600. Average training loss: 0.0689. Accuracy: 0.975
Iteration: 700. Average training loss: 0.0785. Accuracy: 0.975
Iteration: 800. Average training loss: 0.0693. Accuracy: 0.975
Iteration: 900. Average training loss: 0.0650. Accuracy: 0.975
Iteration: 1000. Average training loss: 0.0689. Accuracy: 0.975


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1692. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1758. Accuracy: 0.94

Epoch [17/20]: Train accuracy: 0.975. Train loss: 0.0669. Evaluation accuracy: 0.943. Evaluation loss: 0.1672

Start epoch [18/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.0601. Accuracy: 0.978
Iteration: 200. Average training loss: 0.0623. Accuracy: 0.977
Iteration: 300. Average training loss: 0.0713. Accuracy: 0.976
Iteration: 400. Average training loss: 0.0642. Accuracy: 0.976
Iteration: 500. Average training loss: 0.0578. Accuracy: 0.977
Iteration: 600. Average training loss: 0.0645. Accuracy: 0.977
Iteration: 700. Average training loss: 0.0666. Accuracy: 0.977
Iteration: 800. Average training loss: 0.0620. Accuracy: 0.977
Iteration: 900. Average training loss: 0.0605. Accuracy: 0.977
Iteration: 1000. Average training loss: 0.0641. Accuracy: 0.977


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1749. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1829. Accuracy: 0.94

Epoch [18/20]: Train accuracy: 0.977. Train loss: 0.0633. Evaluation accuracy: 0.943. Evaluation loss: 0.1723

Start epoch [19/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.0559. Accuracy: 0.978
Iteration: 200. Average training loss: 0.0604. Accuracy: 0.978
Iteration: 300. Average training loss: 0.0661. Accuracy: 0.976
Iteration: 400. Average training loss: 0.0662. Accuracy: 0.976
Iteration: 500. Average training loss: 0.0527. Accuracy: 0.977
Iteration: 600. Average training loss: 0.0648. Accuracy: 0.977
Iteration: 700. Average training loss: 0.0615. Accuracy: 0.977
Iteration: 800. Average training loss: 0.0615. Accuracy: 0.977
Iteration: 900. Average training loss: 0.0544. Accuracy: 0.977
Iteration: 1000. Average training loss: 0.0680. Accuracy: 0.977


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1762. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1852. Accuracy: 0.94

Epoch [19/20]: Train accuracy: 0.977. Train loss: 0.0609. Evaluation accuracy: 0.943. Evaluation loss: 0.1744

Start epoch [20/20]


Training:   0%|          | 0/1030 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.0493. Accuracy: 0.981
Iteration: 200. Average training loss: 0.0559. Accuracy: 0.980
Iteration: 300. Average training loss: 0.0622. Accuracy: 0.979
Iteration: 400. Average training loss: 0.0590. Accuracy: 0.979
Iteration: 500. Average training loss: 0.0510. Accuracy: 0.979
Iteration: 600. Average training loss: 0.0641. Accuracy: 0.979
Iteration: 700. Average training loss: 0.0628. Accuracy: 0.978
Iteration: 800. Average training loss: 0.0584. Accuracy: 0.978
Iteration: 900. Average training loss: 0.0520. Accuracy: 0.979
Iteration: 1000. Average training loss: 0.0599. Accuracy: 0.979


Evaluation:   0%|          | 0/295 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1790. Accuracy: 0.94
Iteration: 200. Average evaluation loss: 0.1893. Accuracy: 0.94

Epoch [20/20]: Train accuracy: 0.979. Train loss: 0.0577. Evaluation accuracy: 0.944. Evaluation loss: 0.1786


In [None]:
path = untar_data(URLs.YELP_REVIEWS)

In [None]:
path.ls()

(#3) [Path('/root/.fastai/data/yelp_review_full_csv/readme.txt'),Path('/root/.fastai/data/yelp_review_full_csv/test.csv'),Path('/root/.fastai/data/yelp_review_full_csv/train.csv')]

In [None]:
def load_data(path, file_list, dataset, encoding='utf8'):
   for file in file_list:
        with open(os.path.join(path, file), 'r', encoding=encoding) as text:
            dataset.append(text.read())

In [None]:
# Concatenate training and testing examples into one dataset
dataset = pd.concat([pd.DataFrame({'review': train_pos, 'label':1}),
                     pd.DataFrame({'review': test_pos, 'label':1}),
                     pd.DataFrame({'review': train_neg, 'label':0}),
                     pd.DataFrame({'review': test_neg, 'label':0})],
                     axis=0, ignore_index=True)

In [None]:
# Get indices of duplicate data (excluding first occurrence)
duplicate_indices = dataset.loc[dataset.duplicated(keep='first')].index

# Count and print the number of duplicates
print('Number of duplicates in the dataset: {}'.format(dataset.loc[duplicate_indices, 'review'].count()))

In [None]:
# Drop duplicates
dataset.drop_duplicates(keep='first', inplace=True)

In [None]:
def polarity(text):
    """Calculate the polarity score of the input text.
    
    """
    return TextBlob(text).sentiment.polarity

In [None]:
def subjectivity(text):
    """Calculate the subjectivity score of the input text.
    
    """
    return TextBlob(text).sentiment.subjectivity

In [None]:
# Save raw dataset as a CSV file
dataset.to_csv(os.path.join(path, '/content/dataset/datasets_feat_1/dataset_raw/dataset_raw.csv'), index=False)

In [None]:
def pos2(df, batch_size, n_threads, required_tags):
  
    # Create empty dictionary
    review_dict = collections.defaultdict(dict)
    for i, doc in enumerate(nlp.pipe(df, batch_size=batch_size, n_threads=n_threads)):
         for token in doc:
            pos = token.pos_
            if pos in required_tags:
                review_dict[i].setdefault(pos, 0)
                review_dict[i][pos] = review_dict[i][pos] + 1
    # Transpose data frame to shape (index, tags)
    return pd.DataFrame(review_dict).transpose()

In [None]:
def extract_features(df, batch_size, n_threads, required_tags):
    
    # Calculate polarity
    df['polarity'] = df.review.apply(polarity).astype('float16')
    # Calculate subjectivity
    df['subjectivity'] = df.review.apply(subjectivity).astype('float16') 
    # Calculate number of words in review
    df['word_count'] = df.review.apply(lambda text: len(text.split())).astype('int16')
    # Count number of uppercase words, then divide by word_count
    df['UPPERCASE'] = df.review.apply(lambda text: len([word for word in text.split()\
                                                        if word.isupper()]))/df.word_count
    # Change data type to float16
    df.UPPERCASE = df.UPPERCASE.astype('float16')
    # Count number of digits, then divide by word_count
    df['DIGITS'] = df.review.apply(lambda text: len([word for word in text.split()\
                                                     if word.isdigit()]))/df.word_count
    # Change data type to float16
    df.DIGITS = df.DIGITS.astype('float16')
    # Perform part-of-speech taging
    pos_data = pos2(df.review, batch_size=batch_size, n_threads=n_threads, required_tags=required_tags)
    # Divide POS tags count by word_count
    pos_data = pos_data.div(df.word_count, axis=0).astype('float16')
    # Concatenate pandas data frames horizontaly
    return pd.concat([df, pos_data], axis=1)

In [None]:
# Load language model and disable unnecessary components of processing pipeline
nlp = spacy.load('en_core_web_sm', disable = ['ner', 'parser', 'textcat', '...'])
required_tags = ['PROPN', 'PUNCT', 'NOUN', 'ADJ', 'VERB']

batch_size = 512
n_threads = 2

# Test the processing time on a part of the trainig set, given batch_size and n_threads
start_time = timeit.default_timer()
print('Start processing 1000 examples using batch_size: {} and n_threads: {}'.format(batch_size, n_threads))
extract_features(dataset.loc[:1000, :], batch_size=batch_size, n_threads=n_threads, required_tags=required_tags)
print('Feature extraction function processing time: {:.2f} sec'.format(timeit.default_timer() - start_time))

Start processing 1000 examples using batch_size: 512 and n_threads: 2
Feature extraction function processing time: 14.01 sec


In [None]:
def split_norm_save(df, name, path, part_size, batch_size, n_threads, nlp):
   
    if name not in os.listdir(path):
        dataset_parts = []
        N = int(len(df)/part_size)
        # Create list of dataframe chunks
        data_frames = [df.iloc[i*part_size:(i+1)*part_size, 0].copy() for i in range(N+1)]
        # Process dataset partialy 
        for frame in tqdm_notebook(data_frames):
            # Normalize dataset chunk
            dataset_part = text_preprocessing(frame, batch_size=batch_size, n_threads=n_threads)
            dataset_parts.append(dataset_part)
            # Reload nlp
            nlp = spacy.load('en_core_web_sm', disable = ['ner', 'parser', 'textcat', '...'])

        # Concatenate all parts into one series
        concat_clean = pd.concat(dataset_parts, axis=0, sort=False)
        # Concatenate dataset and cleaned review seires
        dataset_clean = pd.concat([df, concat_clean], axis=1)        
        # Export data frame to CSV file
        dataset_clean.to_csv(path + name, index=False)
    else:
        print('File {} already exists in given directory.'.format(name))

In [None]:
# Define variables
nlp = spacy.load('en_core_web_sm', disable = ['ner', 'parser', 'textcat', 'tagger', '...'])
batch_size = 512
n_threads = 2
part_size = 5000
path = os.path.join(os.getcwd(), 'dataset/datasets_feat_clean_1/')
name = 'dataset_feat_clean.csv'

# Perform text preprocessing and save the resulted frame to CSV file
split_norm_save(dataset_feat, name, path, part_size, batch_size, n_threads, nlp)

  0%|          | 0/19 [00:00<?, ?it/s]

In [None]:
# Dictionary of {column: dtype} pairs
col_types = {'review': str,'label': np.int16, 'polarity': np.float16, 'subjectivity': np.float16,
             'word_count': np.int16, 'UPPERCASE': np.float16, 'DIGITS': np.float16, 'PROPN': np.float16,
             'VERB': np.float16, 'NOUN': np.float16, 'PUNCT': np.float16, 'ADJ': np.float16}

# Import dataset from the CSV file
dataset_feat_clean = pd.read_csv('dataset/datasets_feat_clean/dataset_feat_clean.csv', dtype=col_types)

In [None]:
dataset_feat_clean.head()

Unnamed: 0,review,label,polarity,subjectivity,word_count,UPPERCASE,DIGITS,VERB,NOUN,PUNCT,PROPN,ADJ,clean_review
0,Of course if you are reading my review you have seen this film already. 'Raja Babu' is one of my most favorite characters. I just love the concept of a spoiled brat with a 24*7 servant on his motorcycle. Watch movies and emulate characters etc etc. I love the scene when a stone cracks in Kader khans mouth while eating. Also where Shakti Kapoor narrates a corny story of Raja Babu's affairs on a dinner table and Govinda wearing 'dharam-veer' uniform makes sentimental remarks. Thats my favorite scene of the film. 'Achcha Pitaji To Main Chalta Hoon' scene is just chemistry between two great In...,1,0.335938,0.713867,178,0.022476,0.0,0.09552,0.213501,0.17981,0.151733,0.073059,"course read review see film raja babu favorite character love concept spoil brat servant motorcycle watch movie emulate character etc etc love scene stone crack kader khan mouth eat shakti kapoor narrate corny story raja babu affair dinner table govinda wear dharam veer uniform make sentimental remark s favorite scene film achcha pitaji main chalta hoon scene chemistry great indian actor comical scene dialog brilliant cat mouse film watch actor help take away scene total entertainment like govinda kader khan chemistry think rb 6th list david dhawan deewana mastana ankhein','shola shabnam s..."
1,"I really enjoyed this movie. Most of the reviews have been bad, but most critics think a movie should be like an idea drama. This movie has a little bit of drama, but the rest is just clean fun and very entertaining. Forget about Julia Roberts being a Pretty Woman, Emma Roberts is a beautiful young lady and there is more to her than just that. Emma was so much fun to watch in the role of Nancy Drew. It is good to see a new face. I believe she will go far.<br /><br />Nancy Drew may not be based upon the books, but the story is still good. There is also a good blend of other character actors...",1,0.351807,0.59375,196,0.015305,0.0,0.107117,0.188721,0.122437,0.102051,0.096924,enjoy movie review bad critic think movie like idea drama movie little bite drama rest clean fun entertain forget julia roberts pretty woman emma roberts beautiful young lady emma fun watch role nancy draw good new face believe far.nancy draw base book story good good blend character actor support actor like pat carroll barry bostwick rachel leigh cook chris kattan credit surprise disney release movie people like movie contain sex violence curse good family film rare day time family movie judge self good wait sequel
2,"This is not a movie you watch for entertainment, at least most people I know would not.<br /><br />It's portraits the cruelty to both body and mind that happen in a war pretty well, the characters seem plausible, especially because you ""read their minds"", something more often found in books and rarely in movies, however done very well in this piece. I would place it next to ""All quiet on the western front"" and ""Die Brücke"" in terms of leaving a lasting impression.<br /><br />I wish I could screen it at school, along with the other two movies - however finding a copy of it showed to be pret...",1,0.118896,0.48999,115,0.03479,0.0,0.156494,0.182617,0.165161,0.008698,0.06958,movie watch entertainment people know not.it portrait cruelty body mind happen war pretty character plausible especially read mind find book rarely movie piece place quiet western die brücke term leave last impression.i wish screen school movie find copy show pretty hard shame
3,"Brilliant! My wife and I joined the sprawling line to see Holly at the Edinburgh Film Festival. After seeing the film, I can understand why there was such a long line. Holly is a touching story about an impossible connection between two people. She is a young girl, he is a worn out westerner. The film grasped every bone in our body. There aren't any graphic scenes or anything that is hard to watch - its the surrealism of normality that really kicks you in the gut. The film is beautifully shot. Among others, we loved the scene where Patrick teaches Holly to ride a small motorcycle. Thuy Ngo...",1,0.166748,0.53418,131,0.015266,0.0,0.137451,0.183228,0.122131,0.091614,0.076355,brilliant wife join sprawl line holly edinburgh film festival see film understand long line holly touch story impossible connection people young girl wear westerner film grasp bone body graphic scene hard watch surrealism normality kick gut film beautifully shoot love scene patrick teach holly ride small motorcycle thuy ngoyen rawness not believe act job)and ron livingston performance stay couple day highly recommend
4,"This film could well have been one of those ordinary ""soapies"" relating the day to day events of half a dozen families whose lives are intertwined..broken relationships,building new friendships, street bashings, near accidents, hopes and dreams and even the discovery of a baby discarded under some bushes! What a mixture of events!<br /><br />Fortunately the film maker goes beyond those daily events and poses questions to consider although there are no satisfactory answers. He asksin this chaotic world do things just happen, is it just luck when things turn out right or , taking a fatalis...",1,0.204102,0.521973,198,0.0,0.0,0.121216,0.212158,0.111084,0.025253,0.101013,film ordinary soapies relate day day event half dozen family live intertwine break relationship build new friendship street bashing near accident hope dream discovery baby discard bush mixture events!fortunately film maker go daily event pose question consider satisfactory answer ask chaotic world thing happen luck thing turn right take fatalistic view person predestine certain place certain time involve event future take new perspective uncanny experience.is super ego make believe important character say sit edge overlook grand canyon come realize infinitely small was.this favourite film ...


In [None]:
def train_val_test_split(df, val_size, test_size, random_state=0):
 
    assert (val_size + test_size) < 1, 'Validation size and test size sum is greater or equal 1'
    assert val_size >= 0 and test_size >= 0, 'Negative size is not accepted'
    train, val, test = np.split(df.sample(frac=1, random_state=random_state),
                                [int((1-(val_size+test_size))*len(df)), int((1-test_size)*len(df))])
    return train, val, test

In [None]:
# Split entire raw dataset into training, validation and test sets
train_set, val_set, test_set = train_val_test_split(dataset_feat_clean, val_size=0.20, test_size=0.10)

In [None]:
# Check training, validation and test sets shapes
print('Training set shape: {}'.format(train_set.shape))
print('Validation set shape: {}'.format(val_set.shape))
print('Test set shape: {}'.format(test_set.shape))

Training set shape: (65914, 13)
Validation set shape: (18833, 13)
Test set shape: (9417, 13)


In [None]:
# Import the dataset
dataset_feat_clean= pd.read_csv('dataset/datasets_feat_clean_1/dataset_feat_clean.csv')

In [None]:
# Save training, validation and test sets to CSV files
train_set.to_csv('dataset/datasets_feat_clean_1/train_feat_clean.csv', index=False)
val_set.to_csv('dataset/datasets_feat_clean_1/val_feat_clean.csv', index=False)
test_set.to_csv('dataset/datasets_feat_clean_1/test_feat_clean.csv', index=False)

In [None]:
# Load the training set
train_dataset = pd.read_csv('dataset/datasets_feat_clean/train_feat_clean.csv', 
                      usecols=['clean_review', 'subjectivity', 'polarity', 'word_count', 'label'],
                      dtype={'clean_review': str, 'label': np.int16})

In [None]:
# Change the columns order
train_dataset = train_dataset[['clean_review', 'subjectivity', 'polarity', 'word_count', 'label']]

In [None]:
# Change the columns order
val_dataset = val_dataset[['clean_review', 'subjectivity', 'polarity', 'word_count', 'label']]

In [None]:
train_vocab = Vocab(train_dataset, target_col=None, word2index=None, sos_token='<SOS>', eos_token='<EOS>',
                    unk_token='<UNK>', pad_token='<PAD>', min_word_count=None, max_vocab_size=5000, max_seq_len=0.8,
                    use_pretrained_vectors=False, glove_path='glove/', glove_name='glove.6B.100d.txt',
                    weights_file_name='glove/weights.npy')

Trimmed vocabulary using maximum size of: 5000
5000/129454 tokens has been retained
Trimmed input strings vocabulary
Trimmed input sequences lengths to the length of: 80
Mapped words to indices


In [None]:
val_vocab = Vocab(val_dataset, target_col=None, word2index=train_vocab.word2index, sos_token='<SOS>', eos_token='<EOS>',
                  unk_token='<UNK>', pad_token='<PAD>', min_word_count=None, max_vocab_size=5000, max_seq_len=0.8,
                  use_pretrained_vectors=False, glove_path='Glove/', glove_name='glove.6B.100d.txt',
                  weights_file_name='Glove/weights.npy')

Trimmed vocabulary using maximum size of: 5000
5000/58762 tokens has been retained
Trimmed input strings vocabulary
Trimmed input sequences lengths to the length of: 80
Mapped words to indices


In [None]:
train_iterator = BatchIterator(train_dataset, batch_size=32, vocab_created=False, vocab=None, target_col=None,
                               word2index=None, sos_token='<SOS>', eos_token='<EOS>', unk_token='<UNK>',
                               pad_token='<PAD>', min_word_count=5, max_vocab_size=None, max_seq_len=0.8,
                               use_pretrained_vectors=False, glove_path='glove/', glove_name='glove.6B.100d.txt',
                               weights_file_name='glove/weights.npy')

Trimmed vocabulary using as minimum count threashold: count = 5.00
26330/129454 tokens has been retained
Trimmed input strings vocabulary
Trimmed input sequences lengths to the length of: 90
Mapped words to indices
Batches created


In [None]:
val_iterator = BatchIterator(val_dataset, batch_size=32, vocab_created=False, vocab=None, target_col=None,
                             word2index=train_iterator.word2index, sos_token='<SOS>', eos_token='<EOS>',
                             unk_token='<UNK>', pad_token='<PAD>', min_word_count=5, max_vocab_size=None,
                             max_seq_len=0.8, use_pretrained_vectors=False, glove_path='glove/',
                             glove_name='glove.6B.100d.txt', weights_file_name='glove/weights.npy')

Trimmed vocabulary using as minimum count threashold: count = 5.00
14177/58762 tokens has been retained
Trimmed input strings vocabulary
Trimmed input sequences lengths to the length of: 88
Mapped words to indices
Batches created


In [None]:
for batches in train_iterator:
    # Unpack the dictionary of batches
    input_seq, target, x_lengths = batches['input_seq'], batches['target'], batches['x_lengths']
    print('input_seq shape: ', input_seq.size())
    print('target shape: ', target.size())
    print('x_lengths shape: ', x_lengths.size())
    break

input_seq shape:  torch.Size([32, 2])
target shape:  torch.Size([32])
x_lengths shape:  torch.Size([32])


In [2]:
from sklearn.metrics import f1_score

In [None]:
class BiGRU(nn.Module):
  def __init__(self, hidden_size, vocab_size, embedding_dim, output_size, n_layers=1, dropout=0.2,
                 spatial_dropout=True, bidirectional=True):
        
        # Inherit everything from the nn.Module
        super(BiGRU, self).__init__()
        
        # Initialize attributes
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout_p = dropout
        self.spatial_dropout = spatial_dropout
        self.bidirectional = bidirectional
        self.n_directions = 2 if self.bidirectional else 1
        
        # Initialize layers
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
        self.dropout = nn.Dropout(self.dropout_p)
        if self.spatial_dropout:
            self.spatial_dropout1d = nn.Dropout2d(self.dropout_p)
        self.gru = nn.GRU(self.embedding_dim, self.hidden_size, num_layers=self.n_layers, 
                          dropout=(0 if n_layers == 1 else self.dropout_p), batch_first=True,
                          bidirectional=self.bidirectional)
        # Linear layer input size is equal to hidden_size * 3, becuase
        # we will concatenate max_pooling ,avg_pooling and last hidden state
        self.linear = nn.Linear(self.hidden_size * 3, self.output_size)

        
  def forward(self, input_seq, input_lengths, hidden=None):
       
        # Extract batch_size
        self.batch_size = input_seq.size(0)
        
        # Embeddings shapes
        # Input: (batch_size,  seq_length)
        # Output: (batch_size, seq_length, embedding_dim)
        emb_out = self.embedding(input_seq)
        
        if self.spatial_dropout:
            # Convert to (batch_size, embedding_dim, seq_length)
            emb_out = emb_out.permute(0, 2, 1)
            emb_out = self.spatial_dropout1d(emb_out)
            # Convert back to (batch_size, seq_length, embedding_dim)
            emb_out = emb_out.permute(0, 2, 1)
        else:
            emb_out = self.dropout(emb_out)
        
        # Pack padded batch of sequences for RNN module
        packed_emb = nn.utils.rnn.pack_padded_sequence(emb_out, input_lengths, batch_first=True)
                
       
        gru_out, hidden = self.gru(packed_emb, hidden)
       
        hidden = hidden.view(self.n_layers, self.n_directions, self.batch_size, self.hidden_size)
        last_hidden = hidden[-1]
        # last hidden shape (num_directions, batch_size, hidden_size)
        # Sum the last hidden state of forward and backward layer
        last_hidden = torch.sum(last_hidden, dim=0)
        # Summed last hidden shape (batch_size, hidden_size)
        
        # Pad a packed batch
        # gru_out output shape: (batch_size, seq_len, hidden_size*num_directions)
        gru_out, lengths = nn.utils.rnn.pad_packed_sequence(gru_out, batch_first=True)
              
        # Sum the gru_out along the num_directions
        if self.bidirectional:
            gru_out = gru_out[:,:,:self.hidden_size] + gru_out[:,:,self.hidden_size:]
        
        # Select the maximum value over each dimension of the hidden representation (max pooling)
        # Permute the input tensor to dimensions: (batch_size, hidden, seq_len)
        # Output dimensions: (batch_size, hidden_size)
        max_pool = F.adaptive_max_pool1d(gru_out.permute(0,2,1), (1,)).view(self.batch_size,-1)
        
        # Consider the average of the representations (mean pooling)
        # Sum along the batch axis and divide by the corresponding lengths (FloatTensor)
        # Output shape: (batch_size, hidden_size)
        avg_pool = torch.sum(gru_out, dim=1) / lengths.view(-1,1).type(torch.FloatTensor) 

        # Concatenate max_pooling, avg_pooling and last hidden state tensors
        concat_out = torch.cat([last_hidden, max_pool, avg_pool], dim=1)

        #concat_out = self.dropout(concat_out)
        out = self.linear(concat_out)
        return F.log_softmax(out, dim=-1)
    
    
  def add_loss_fn(self, loss_fn):
        """Add loss function to the model.
        
        """
        self.loss_fn = loss_fn
        

  def add_optimizer(self, optimizer):
        """Add optimizer to the model.
        
        """
        self.optimizer = optimizer
        
        
  def add_device(self, device=torch.device('cpu')):
        """Specify the device.
        
        """
        self.device = device
    
    
  def train_model(self, train_iterator):
   
        self.train()
        
        train_losses = []
        losses = []
        losses_list = []
        num_seq = 0
        batch_correct = 0
            
        for i, batches in tqdm_notebook(enumerate(train_iterator, 1), total=len(train_iterator), desc='Training'):
            input_seq, target, x_lengths = batches['input_seq'], batches['target'], batches['x_lengths']
            
            input_seq.to(self.device)
            target.to(self.device)
            x_lengths.to(self.device)
            
            self.optimizer.zero_grad()

            pred = self.forward(input_seq, x_lengths)
            loss = self.loss_fn(pred, target)
            loss.backward()
            losses.append(loss.data.cpu().numpy())
            self.optimizer.step()
            
            losses_list.append(loss.data.cpu().numpy())
            
            pred = torch.argmax(pred, 1)

            if self.device.type == 'cpu':
                batch_correct += (pred.cpu() == target.cpu()).sum().item()

            else:
                batch_correct += (pred == target).sum().item()

            num_seq += len(input_seq)     
    
            if i % 100 == 0:
                avg_train_loss = np.mean(losses)
                train_losses.append(avg_train_loss)
                av_f1_score = np.mean(f1_score)
                
                accuracy = batch_correct / num_seq
                
                print('Iteration: {}. Average training loss: {:.4f}. Accuracy: {:.3f}. F1_score: {:.3f}'\
                      .format(i, avg_train_loss, accuracy,av_f1_score))
                
                losses = []
                
            avg_loss = np.mean(losses_list)
            accuracy = batch_correct / num_seq
                              
        return train_losses, avg_loss, accuracy
    
    
  def evaluate_model(self, eval_iterator, conf_mtx=False):
        
        
        self.eval()
        
        eval_losses = []
        losses = []
        losses_list = []
        num_seq = 0
        batch_correct = 0
        pred_total = torch.LongTensor()
        target_total = torch.LongTensor()
        
        with torch.no_grad():
            for i, batches in tqdm_notebook(enumerate(eval_iterator, 1), total=len(eval_iterator), desc='Evaluation'):
                input_seq, target, x_lengths = batches['input_seq'], batches['target'], batches['x_lengths']
                
                input_seq.to(self.device)
                target.to(self.device)
                x_lengths.to(self.device)

                pred = self.forward(input_seq, x_lengths)
                loss = self.loss_fn(pred, target)
                losses.append(loss.data.cpu().numpy())
                losses_list.append(loss.data.cpu().numpy())
                
                pred = torch.argmax(pred, 1)
                                
                if self.device.type == 'cpu':
                    batch_correct += (pred.cpu() == target.cpu()).sum().item()
                    
                else:
                    batch_correct += (pred == target).sum().item()
                    
                num_seq += len(input_seq)     
                
                pred_total = torch.cat([pred_total, pred], dim=0)
                target_total = torch.cat([target_total, target], dim=0)
                
                if i % 100 == 0:
                    avg_batch_eval_loss = np.mean(losses)
                    eval_losses.append(avg_batch_eval_loss)
                    
                    accuracy = batch_correct / num_seq
                    
                    print('Iteration: {}. Average evaluation loss: {:.4f}. Accuracy: {:.2f}'\
                          .format(i, avg_batch_eval_loss, accuracy))

                    losses = []
                    
            avg_loss_list = []
                    
            avg_loss = np.mean(losses_list)
            accuracy = batch_correct / num_seq
            
            conf_matrix = confusion_matrix(target_total.view(-1), pred_total.view(-1))
        
        if conf_mtx:
            print('\tConfusion matrix: ', conf_matrix)
            
        return eval_losses, avg_loss, accuracy, conf_matrix

In [None]:
# Initialize parameters
hidden_size = 8
vocab_size = len(train_iterator.word2index)
embedding_dim = 200
output_size = 2
n_layers = 1
dropout = 0.5
learning_rate = 0.001
epochs = 20
spatial_dropout = True

# Check whether system supports CUDA
CUDA = torch.cuda.is_available()

model = BiGRU(hidden_size, vocab_size, embedding_dim, output_size, n_layers, dropout,
              spatial_dropout, bidirectional=True)

# Move the model to GPU if possible
#if CUDA:
   # model.cuda()

model.add_loss_fn(nn.NLLLoss())

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
model.add_optimizer(optimizer)

device = torch.device('cuda' if CUDA else 'cpu')

model.add_device(device)

# Instantiate the EarlyStopping
#early_stop = EarlyStopping(wait_epochs=1)
early_stop = EarlyStopping(monitor="val_accuracy", min_delta=0.00, patience=3, verbose=False, mode="max")

train_losses_list, train_avg_loss_list, train_accuracy_list = [], [], []
eval_avg_loss_list, eval_accuracy_list, conf_matrix_list = [], [], []

for epoch in range(epochs):
    
    print('\nStart epoch [{}/{}]'.format(epoch+1, epochs))
    
    train_losses, train_avg_loss, train_accuracy = model.train_model(train_iterator)
    
    train_losses_list.append(train_losses)
    train_avg_loss_list.append(train_avg_loss)
    train_accuracy_list.append(train_accuracy)
    
    _, eval_avg_loss, eval_accuracy, conf_matrix = model.evaluate_model(val_iterator)
    
    eval_avg_loss_list.append(eval_avg_loss)
    eval_accuracy_list.append(eval_accuracy)
    conf_matrix_list.append(conf_matrix)
    
    print('\nEpoch [{}/{}]: Train accuracy: {:.3f}. Train loss: {:.4f}. Evaluation accuracy: {:.3f}. Evaluation loss: {:.4f}'\
          .format(epoch+1, epochs, train_accuracy, train_avg_loss, eval_accuracy, eval_avg_loss))
    
   # if early_stop.stop(eval_avg_loss, model, delta=0.003):
     #   break


Start epoch [1/20]


Training:   0%|          | 0/2060 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.4710. Accuracy: 0.712
Iteration: 200. Average training loss: 0.3578. Accuracy: 0.733
Iteration: 300. Average training loss: 0.4167. Accuracy: 0.725
Iteration: 400. Average training loss: 0.3711. Accuracy: 0.736
Iteration: 500. Average training loss: 0.4223. Accuracy: 0.737
Iteration: 600. Average training loss: 0.4011. Accuracy: 0.741
Iteration: 700. Average training loss: 0.3188. Accuracy: 0.751
Iteration: 800. Average training loss: 0.3031. Accuracy: 0.760
Iteration: 900. Average training loss: 0.2835. Accuracy: 0.769
Iteration: 1000. Average training loss: 0.3801. Accuracy: 0.771
Iteration: 1100. Average training loss: 0.3008. Accuracy: 0.778
Iteration: 1200. Average training loss: 0.2792. Accuracy: 0.784
Iteration: 1300. Average training loss: 0.3183. Accuracy: 0.788
Iteration: 1400. Average training loss: 0.3536. Accuracy: 0.790
Iteration: 1500. Average training loss: 0.3181. Accuracy: 0.794
Iteration: 1600. Average training loss: 0.2672. A

Evaluation:   0%|          | 0/589 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.2300. Accuracy: 0.89
Iteration: 200. Average evaluation loss: 0.2369. Accuracy: 0.89
Iteration: 300. Average evaluation loss: 0.2051. Accuracy: 0.90
Iteration: 400. Average evaluation loss: 0.1922. Accuracy: 0.90
Iteration: 500. Average evaluation loss: 0.1882. Accuracy: 0.90

Epoch [1/20]: Train accuracy: 0.816. Train loss: 0.3258. Evaluation accuracy: 0.900. Evaluation loss: 0.2169

Start epoch [2/20]


Training:   0%|          | 0/2060 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.2316. Accuracy: 0.893
Iteration: 200. Average training loss: 0.2390. Accuracy: 0.888
Iteration: 300. Average training loss: 0.2711. Accuracy: 0.881
Iteration: 400. Average training loss: 0.2421. Accuracy: 0.882
Iteration: 500. Average training loss: 0.2844. Accuracy: 0.879
Iteration: 600. Average training loss: 0.2691. Accuracy: 0.878
Iteration: 700. Average training loss: 0.2021. Accuracy: 0.882
Iteration: 800. Average training loss: 0.1974. Accuracy: 0.885
Iteration: 900. Average training loss: 0.1914. Accuracy: 0.888
Iteration: 1000. Average training loss: 0.2669. Accuracy: 0.887
Iteration: 1100. Average training loss: 0.2031. Accuracy: 0.889
Iteration: 1200. Average training loss: 0.1890. Accuracy: 0.891
Iteration: 1300. Average training loss: 0.2417. Accuracy: 0.891
Iteration: 1400. Average training loss: 0.2605. Accuracy: 0.890
Iteration: 1500. Average training loss: 0.2547. Accuracy: 0.890
Iteration: 1600. Average training loss: 0.2093. A

Evaluation:   0%|          | 0/589 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.2062. Accuracy: 0.91
Iteration: 200. Average evaluation loss: 0.2103. Accuracy: 0.91
Iteration: 300. Average evaluation loss: 0.1810. Accuracy: 0.91
Iteration: 400. Average evaluation loss: 0.1728. Accuracy: 0.92
Iteration: 500. Average evaluation loss: 0.1643. Accuracy: 0.92

Epoch [2/20]: Train accuracy: 0.895. Train loss: 0.2263. Evaluation accuracy: 0.915. Evaluation loss: 0.1912

Start epoch [3/20]


Training:   0%|          | 0/2060 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1886. Accuracy: 0.915
Iteration: 200. Average training loss: 0.2039. Accuracy: 0.914
Iteration: 300. Average training loss: 0.2230. Accuracy: 0.909
Iteration: 400. Average training loss: 0.2068. Accuracy: 0.910
Iteration: 500. Average training loss: 0.2375. Accuracy: 0.908
Iteration: 600. Average training loss: 0.2227. Accuracy: 0.907
Iteration: 700. Average training loss: 0.1754. Accuracy: 0.909
Iteration: 800. Average training loss: 0.1678. Accuracy: 0.911
Iteration: 900. Average training loss: 0.1542. Accuracy: 0.913
Iteration: 1000. Average training loss: 0.2164. Accuracy: 0.913
Iteration: 1100. Average training loss: 0.1759. Accuracy: 0.914
Iteration: 1200. Average training loss: 0.1703. Accuracy: 0.915
Iteration: 1300. Average training loss: 0.2102. Accuracy: 0.915
Iteration: 1400. Average training loss: 0.2185. Accuracy: 0.914
Iteration: 1500. Average training loss: 0.2187. Accuracy: 0.913
Iteration: 1600. Average training loss: 0.1801. A

Evaluation:   0%|          | 0/589 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1983. Accuracy: 0.92
Iteration: 200. Average evaluation loss: 0.1990. Accuracy: 0.92
Iteration: 300. Average evaluation loss: 0.1750. Accuracy: 0.92
Iteration: 400. Average evaluation loss: 0.1631. Accuracy: 0.92
Iteration: 500. Average evaluation loss: 0.1558. Accuracy: 0.92

Epoch [3/20]: Train accuracy: 0.916. Train loss: 0.1932. Evaluation accuracy: 0.922. Evaluation loss: 0.1821

Start epoch [4/20]


Training:   0%|          | 0/2060 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1703. Accuracy: 0.926
Iteration: 200. Average training loss: 0.1784. Accuracy: 0.923
Iteration: 300. Average training loss: 0.1923. Accuracy: 0.922
Iteration: 400. Average training loss: 0.1808. Accuracy: 0.922
Iteration: 500. Average training loss: 0.2166. Accuracy: 0.919
Iteration: 600. Average training loss: 0.1979. Accuracy: 0.918
Iteration: 700. Average training loss: 0.1565. Accuracy: 0.920
Iteration: 800. Average training loss: 0.1549. Accuracy: 0.922
Iteration: 900. Average training loss: 0.1376. Accuracy: 0.924
Iteration: 1000. Average training loss: 0.2023. Accuracy: 0.922
Iteration: 1100. Average training loss: 0.1595. Accuracy: 0.923
Iteration: 1200. Average training loss: 0.1485. Accuracy: 0.924
Iteration: 1300. Average training loss: 0.1924. Accuracy: 0.924
Iteration: 1400. Average training loss: 0.2046. Accuracy: 0.923
Iteration: 1500. Average training loss: 0.2101. Accuracy: 0.922
Iteration: 1600. Average training loss: 0.1705. A

Evaluation:   0%|          | 0/589 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1973. Accuracy: 0.92
Iteration: 200. Average evaluation loss: 0.1919. Accuracy: 0.92
Iteration: 300. Average evaluation loss: 0.1704. Accuracy: 0.92
Iteration: 400. Average evaluation loss: 0.1593. Accuracy: 0.93
Iteration: 500. Average evaluation loss: 0.1526. Accuracy: 0.93

Epoch [4/20]: Train accuracy: 0.925. Train loss: 0.1749. Evaluation accuracy: 0.925. Evaluation loss: 0.1779

Start epoch [5/20]


Training:   0%|          | 0/2060 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1494. Accuracy: 0.938
Iteration: 200. Average training loss: 0.1641. Accuracy: 0.932
Iteration: 300. Average training loss: 0.1792. Accuracy: 0.930
Iteration: 400. Average training loss: 0.1633. Accuracy: 0.931
Iteration: 500. Average training loss: 0.1962. Accuracy: 0.928
Iteration: 600. Average training loss: 0.1846. Accuracy: 0.927
Iteration: 700. Average training loss: 0.1368. Accuracy: 0.929
Iteration: 800. Average training loss: 0.1407. Accuracy: 0.930
Iteration: 900. Average training loss: 0.1311. Accuracy: 0.932
Iteration: 1000. Average training loss: 0.1834. Accuracy: 0.931
Iteration: 1100. Average training loss: 0.1403. Accuracy: 0.932
Iteration: 1200. Average training loss: 0.1365. Accuracy: 0.933
Iteration: 1300. Average training loss: 0.1745. Accuracy: 0.932
Iteration: 1400. Average training loss: 0.1905. Accuracy: 0.931
Iteration: 1500. Average training loss: 0.1848. Accuracy: 0.930
Iteration: 1600. Average training loss: 0.1594. A

Evaluation:   0%|          | 0/589 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1970. Accuracy: 0.92
Iteration: 200. Average evaluation loss: 0.1884. Accuracy: 0.92
Iteration: 300. Average evaluation loss: 0.1661. Accuracy: 0.92
Iteration: 400. Average evaluation loss: 0.1582. Accuracy: 0.93
Iteration: 500. Average evaluation loss: 0.1516. Accuracy: 0.93

Epoch [5/20]: Train accuracy: 0.933. Train loss: 0.1598. Evaluation accuracy: 0.926. Evaluation loss: 0.1759

Start epoch [6/20]


Training:   0%|          | 0/2060 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1364. Accuracy: 0.943
Iteration: 200. Average training loss: 0.1474. Accuracy: 0.941
Iteration: 300. Average training loss: 0.1616. Accuracy: 0.938
Iteration: 400. Average training loss: 0.1547. Accuracy: 0.939
Iteration: 500. Average training loss: 0.1847. Accuracy: 0.935
Iteration: 600. Average training loss: 0.1635. Accuracy: 0.935
Iteration: 700. Average training loss: 0.1260. Accuracy: 0.936
Iteration: 800. Average training loss: 0.1367. Accuracy: 0.937
Iteration: 900. Average training loss: 0.1228. Accuracy: 0.939
Iteration: 1000. Average training loss: 0.1672. Accuracy: 0.938
Iteration: 1100. Average training loss: 0.1289. Accuracy: 0.939
Iteration: 1200. Average training loss: 0.1242. Accuracy: 0.940
Iteration: 1300. Average training loss: 0.1633. Accuracy: 0.939
Iteration: 1400. Average training loss: 0.1716. Accuracy: 0.939
Iteration: 1500. Average training loss: 0.1746. Accuracy: 0.938
Iteration: 1600. Average training loss: 0.1443. A

Evaluation:   0%|          | 0/589 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1951. Accuracy: 0.92
Iteration: 200. Average evaluation loss: 0.1877. Accuracy: 0.92
Iteration: 300. Average evaluation loss: 0.1688. Accuracy: 0.93
Iteration: 400. Average evaluation loss: 0.1566. Accuracy: 0.93
Iteration: 500. Average evaluation loss: 0.1496. Accuracy: 0.93

Epoch [6/20]: Train accuracy: 0.939. Train loss: 0.1478. Evaluation accuracy: 0.928. Evaluation loss: 0.1752

Start epoch [7/20]


Training:   0%|          | 0/2060 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1246. Accuracy: 0.951
Iteration: 200. Average training loss: 0.1413. Accuracy: 0.947
Iteration: 300. Average training loss: 0.1608. Accuracy: 0.942
Iteration: 400. Average training loss: 0.1321. Accuracy: 0.942
Iteration: 500. Average training loss: 0.1755. Accuracy: 0.939
Iteration: 600. Average training loss: 0.1568. Accuracy: 0.938
Iteration: 700. Average training loss: 0.1141. Accuracy: 0.941
Iteration: 800. Average training loss: 0.1187. Accuracy: 0.942
Iteration: 900. Average training loss: 0.1100. Accuracy: 0.943
Iteration: 1000. Average training loss: 0.1574. Accuracy: 0.942
Iteration: 1100. Average training loss: 0.1199. Accuracy: 0.943
Iteration: 1200. Average training loss: 0.1190. Accuracy: 0.944
Iteration: 1300. Average training loss: 0.1492. Accuracy: 0.943
Iteration: 1400. Average training loss: 0.1617. Accuracy: 0.943
Iteration: 1500. Average training loss: 0.1603. Accuracy: 0.943
Iteration: 1600. Average training loss: 0.1406. A

Evaluation:   0%|          | 0/589 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.1992. Accuracy: 0.92
Iteration: 200. Average evaluation loss: 0.1883. Accuracy: 0.92
Iteration: 300. Average evaluation loss: 0.1693. Accuracy: 0.93
Iteration: 400. Average evaluation loss: 0.1592. Accuracy: 0.93
Iteration: 500. Average evaluation loss: 0.1500. Accuracy: 0.93

Epoch [7/20]: Train accuracy: 0.944. Train loss: 0.1376. Evaluation accuracy: 0.928. Evaluation loss: 0.1774

Start epoch [8/20]


Training:   0%|          | 0/2060 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1206. Accuracy: 0.952
Iteration: 200. Average training loss: 0.1373. Accuracy: 0.949
Iteration: 300. Average training loss: 0.1448. Accuracy: 0.947
Iteration: 400. Average training loss: 0.1289. Accuracy: 0.947
Iteration: 500. Average training loss: 0.1589. Accuracy: 0.945
Iteration: 600. Average training loss: 0.1446. Accuracy: 0.944
Iteration: 700. Average training loss: 0.1121. Accuracy: 0.945
Iteration: 800. Average training loss: 0.1180. Accuracy: 0.946
Iteration: 900. Average training loss: 0.1050. Accuracy: 0.947
Iteration: 1000. Average training loss: 0.1472. Accuracy: 0.946
Iteration: 1100. Average training loss: 0.1079. Accuracy: 0.947
Iteration: 1200. Average training loss: 0.1056. Accuracy: 0.948
Iteration: 1300. Average training loss: 0.1467. Accuracy: 0.948
Iteration: 1400. Average training loss: 0.1428. Accuracy: 0.948
Iteration: 1500. Average training loss: 0.1507. Accuracy: 0.947
Iteration: 1600. Average training loss: 0.1320. A

Evaluation:   0%|          | 0/589 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.2021. Accuracy: 0.92
Iteration: 200. Average evaluation loss: 0.1896. Accuracy: 0.92
Iteration: 300. Average evaluation loss: 0.1713. Accuracy: 0.93
Iteration: 400. Average evaluation loss: 0.1603. Accuracy: 0.93
Iteration: 500. Average evaluation loss: 0.1502. Accuracy: 0.93

Epoch [8/20]: Train accuracy: 0.947. Train loss: 0.1300. Evaluation accuracy: 0.928. Evaluation loss: 0.1785

Start epoch [9/20]


Training:   0%|          | 0/2060 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1169. Accuracy: 0.952
Iteration: 200. Average training loss: 0.1264. Accuracy: 0.952
Iteration: 300. Average training loss: 0.1323. Accuracy: 0.951
Iteration: 400. Average training loss: 0.1238. Accuracy: 0.951
Iteration: 500. Average training loss: 0.1539. Accuracy: 0.949
Iteration: 600. Average training loss: 0.1376. Accuracy: 0.947
Iteration: 700. Average training loss: 0.0959. Accuracy: 0.950
Iteration: 800. Average training loss: 0.1149. Accuracy: 0.950
Iteration: 900. Average training loss: 0.0953. Accuracy: 0.952
Iteration: 1000. Average training loss: 0.1431. Accuracy: 0.951
Iteration: 1100. Average training loss: 0.1027. Accuracy: 0.951
Iteration: 1200. Average training loss: 0.0999. Accuracy: 0.952
Iteration: 1300. Average training loss: 0.1406. Accuracy: 0.951
Iteration: 1400. Average training loss: 0.1366. Accuracy: 0.951
Iteration: 1500. Average training loss: 0.1525. Accuracy: 0.950
Iteration: 1600. Average training loss: 0.1170. A

Evaluation:   0%|          | 0/589 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.2071. Accuracy: 0.92
Iteration: 200. Average evaluation loss: 0.1957. Accuracy: 0.92
Iteration: 300. Average evaluation loss: 0.1749. Accuracy: 0.93
Iteration: 400. Average evaluation loss: 0.1629. Accuracy: 0.93
Iteration: 500. Average evaluation loss: 0.1556. Accuracy: 0.93

Epoch [9/20]: Train accuracy: 0.952. Train loss: 0.1214. Evaluation accuracy: 0.928. Evaluation loss: 0.1832

Start epoch [10/20]


Training:   0%|          | 0/2060 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1107. Accuracy: 0.956
Iteration: 200. Average training loss: 0.1236. Accuracy: 0.953
Iteration: 300. Average training loss: 0.1228. Accuracy: 0.952
Iteration: 400. Average training loss: 0.1145. Accuracy: 0.953
Iteration: 500. Average training loss: 0.1430. Accuracy: 0.950
Iteration: 600. Average training loss: 0.1276. Accuracy: 0.950
Iteration: 700. Average training loss: 0.0862. Accuracy: 0.953
Iteration: 800. Average training loss: 0.1030. Accuracy: 0.954
Iteration: 900. Average training loss: 0.0922. Accuracy: 0.954
Iteration: 1000. Average training loss: 0.1269. Accuracy: 0.953
Iteration: 1100. Average training loss: 0.1059. Accuracy: 0.954
Iteration: 1200. Average training loss: 0.1031. Accuracy: 0.954
Iteration: 1300. Average training loss: 0.1200. Accuracy: 0.954
Iteration: 1400. Average training loss: 0.1396. Accuracy: 0.953
Iteration: 1500. Average training loss: 0.1372. Accuracy: 0.953
Iteration: 1600. Average training loss: 0.1139. A

Evaluation:   0%|          | 0/589 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.2070. Accuracy: 0.92
Iteration: 200. Average evaluation loss: 0.1977. Accuracy: 0.92
Iteration: 300. Average evaluation loss: 0.1768. Accuracy: 0.93
Iteration: 400. Average evaluation loss: 0.1613. Accuracy: 0.93
Iteration: 500. Average evaluation loss: 0.1557. Accuracy: 0.93

Epoch [10/20]: Train accuracy: 0.954. Train loss: 0.1148. Evaluation accuracy: 0.929. Evaluation loss: 0.1838

Start epoch [11/20]


Training:   0%|          | 0/2060 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1058. Accuracy: 0.961
Iteration: 200. Average training loss: 0.1154. Accuracy: 0.958
Iteration: 300. Average training loss: 0.1185. Accuracy: 0.956
Iteration: 400. Average training loss: 0.1018. Accuracy: 0.957
Iteration: 500. Average training loss: 0.1386. Accuracy: 0.955
Iteration: 600. Average training loss: 0.1131. Accuracy: 0.955
Iteration: 700. Average training loss: 0.0906. Accuracy: 0.956
Iteration: 800. Average training loss: 0.0985. Accuracy: 0.957
Iteration: 900. Average training loss: 0.0892. Accuracy: 0.957
Iteration: 1000. Average training loss: 0.1214. Accuracy: 0.957
Iteration: 1100. Average training loss: 0.0935. Accuracy: 0.957
Iteration: 1200. Average training loss: 0.0998. Accuracy: 0.957
Iteration: 1300. Average training loss: 0.1213. Accuracy: 0.957
Iteration: 1400. Average training loss: 0.1259. Accuracy: 0.957
Iteration: 1500. Average training loss: 0.1348. Accuracy: 0.956
Iteration: 1600. Average training loss: 0.1083. A

Evaluation:   0%|          | 0/589 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.2108. Accuracy: 0.92
Iteration: 200. Average evaluation loss: 0.2002. Accuracy: 0.93
Iteration: 300. Average evaluation loss: 0.1810. Accuracy: 0.93
Iteration: 400. Average evaluation loss: 0.1629. Accuracy: 0.93
Iteration: 500. Average evaluation loss: 0.1594. Accuracy: 0.93

Epoch [11/20]: Train accuracy: 0.957. Train loss: 0.1092. Evaluation accuracy: 0.930. Evaluation loss: 0.1867

Start epoch [12/20]


Training:   0%|          | 0/2060 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.1008. Accuracy: 0.960
Iteration: 200. Average training loss: 0.1066. Accuracy: 0.960
Iteration: 300. Average training loss: 0.1177. Accuracy: 0.957
Iteration: 400. Average training loss: 0.1140. Accuracy: 0.957
Iteration: 500. Average training loss: 0.1329. Accuracy: 0.956
Iteration: 600. Average training loss: 0.1171. Accuracy: 0.955
Iteration: 700. Average training loss: 0.0852. Accuracy: 0.957
Iteration: 800. Average training loss: 0.0983. Accuracy: 0.957
Iteration: 900. Average training loss: 0.0821. Accuracy: 0.959
Iteration: 1000. Average training loss: 0.1156. Accuracy: 0.958
Iteration: 1100. Average training loss: 0.0825. Accuracy: 0.959
Iteration: 1200. Average training loss: 0.0886. Accuracy: 0.960
Iteration: 1300. Average training loss: 0.1243. Accuracy: 0.959
Iteration: 1400. Average training loss: 0.1198. Accuracy: 0.959
Iteration: 1500. Average training loss: 0.1270. Accuracy: 0.959
Iteration: 1600. Average training loss: 0.1056. A

Evaluation:   0%|          | 0/589 [00:00<?, ?it/s]

Iteration: 100. Average evaluation loss: 0.2134. Accuracy: 0.92
Iteration: 200. Average evaluation loss: 0.2017. Accuracy: 0.92
Iteration: 300. Average evaluation loss: 0.1845. Accuracy: 0.93
Iteration: 400. Average evaluation loss: 0.1650. Accuracy: 0.93
Iteration: 500. Average evaluation loss: 0.1573. Accuracy: 0.93

Epoch [12/20]: Train accuracy: 0.959. Train loss: 0.1050. Evaluation accuracy: 0.930. Evaluation loss: 0.1886

Start epoch [13/20]


Training:   0%|          | 0/2060 [00:00<?, ?it/s]

Iteration: 100. Average training loss: 0.0861. Accuracy: 0.963
Iteration: 200. Average training loss: 0.1013. Accuracy: 0.959
Iteration: 300. Average training loss: 0.1068. Accuracy: 0.959
Iteration: 400. Average training loss: 0.0968. Accuracy: 0.960
Iteration: 500. Average training loss: 0.1176. Accuracy: 0.959
Iteration: 600. Average training loss: 0.1084. Accuracy: 0.959
Iteration: 700. Average training loss: 0.0812. Accuracy: 0.960
Iteration: 800. Average training loss: 0.0891. Accuracy: 0.961
Iteration: 900. Average training loss: 0.0756. Accuracy: 0.962
Iteration: 1000. Average training loss: 0.1053. Accuracy: 0.962
