<a href="https://colab.research.google.com/github/YousefAtefB/StanceCat-COV19/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setting up colab environment and downad word2vec



## Mount google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/StanceCat-COV19

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/StanceCat-COV19


## Download word2vec CBOW model ~ 2.64 GB

In [None]:
# !pip install wget
# import wget
# url = "https://bakrianoo.ewr1.vultrobjects.com/aravec/full_uni_cbow_300_twitter.zip"
# wget.download(url, './Downloads')

## unzip the model

In [None]:
# import zipfile
# with zipfile.ZipFile('./Downloads/full_uni_cbow_300_twitter.zip', 'r') as zip_ref:
#   zip_ref.extractall('./Downloads')

# Importing libraries

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import re
import nltk
nltk.download('punkt')
from tqdm import tqdm
from sklearn.metrics import f1_score

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# Data PreProcessing

In [None]:
train = pd.read_csv('Dataset/train.csv')
text_train, category_train, stance_train = train['text'], train['category'], train['stance']

dev = pd.read_csv('Dataset/dev.csv')
text_dev, category_dev, stance_dev = dev['text'], dev['category'], dev['stance']

In [None]:
train.head(10)

Unnamed: 0,text,category,stance
0,بيل غيتس يتلقى لقاح #كوفيد19 من غير تصوير الاب...,celebrity,1
1,وزير الصحة لحد اليوم وتحديدا هلأ بمؤتمروا الصح...,info_news,1
2,قولكن رح يكونو اد المسؤولية ب لبنان لما يوصل ...,info_news,1
3,#تركيا.. وزير الصحة فخر الدين قوجة يتلقى أول ج...,celebrity,1
4,وئام وهاب يشتم الدول الخليجية في كل طلة اعلامي...,personal,0
5,"لقاح #كورونا في أميركا.. قلق متزايد من ""التوزي...",info_news,0
6,لبنان اشترى مليونان لقاح امريكي اذا شلنا يلي ع...,info_news,1
7,من عوارض لقاح كورونا<LF>هو تهكير حسابك عتويتر<...,personal,0
8,هناك 1780 مليونيراً في لبنان. ماذا لو فُرضت ال...,unrelated,0
9,دعبول حضرتك منو انت وتطلب من قائد دولة إسلامية...,info_news,1


In [None]:
dev.head(10)

Unnamed: 0,text,category,stance
0,#مريم_رجوي: <LF>حظر خامنئي المجرم شراء #لقاح_ك...,info_news,1
1,#الصحة:<LF>•تم إعطاء 259.530 جرعة من لقاح #كور...,plan,1
2,#خادم_الحرمين - حفظه الله - يتلقى الجرعة الأول...,celebrity,1
3,#الصحه_العالميه: لقاحات #كورونا آمنة ولا خوف م...,info_news,1
4,"#وزيرة_الصحة ""#هالة_زايد"" تقول إنه يجرى مراجعة...",info_news,1
5,2️⃣ وانتهى الفريق من الدراسات قبل السريرية ونش...,info_news,1
6,عاجل 🔴 <LF>.<LF><LF>.<LF><LF>وزارة الصحة :<LF>...,plan,1
7,#فيديو | السفير الأميركي لدى السعودية بعد تلقي...,info_news,1
8,تصريحات وبس الحكومة مع السيسي علي حسب اللقطة! ...,info_news,0
9,الاتحاد الاوروبي تفاوض لشراء لقاحات الكورونا م...,info_news,1


In [None]:
text_train, category_train, stance_train = np.array(train['text']), np.array(train['category']), np.array(train['stance'])
text_dev, category_dev, stance_dev = np.array(dev['text']), np.array(dev['category']), np.array(dev['stance'])

print(text_train.shape, category_train.shape, stance_train.shape)
print(text_dev.shape, category_dev.shape, stance_dev.shape)

(6988,) (6988,) (6988,)
(1000,) (1000,) (1000,)


In [None]:
def clean_str(text):
    search = ["أ","إ","آ","ة","_","-","/",".","،"," و "," يا ",'"',"ـ","'","ى","\\",'\n', '\t','"','?','؟','!']
    replace = ["ا","ا","ا","ه"," "," ","","",""," و"," يا","","","","ي","",' ', ' ',' ',' ? ',' ؟ ',' ! ']
    
    #remove tashkeel
    p_tashkeel = re.compile(r'[\u0617-\u061A\u064B-\u0652]')
    text = re.sub(p_tashkeel,"", text)
    
    #remove longation
    p_longation = re.compile(r'(.)\1+')
    subst = r"\1\1"
    text = re.sub(p_longation, subst, text)
    
    text = text.replace('وو', 'و')
    text = text.replace('يي', 'ي')
    text = text.replace('اا', 'ا')
    
    for i in range(0, len(search)):
        text = text.replace(search[i], replace[i])
    
    #trim    
    text = text.strip()

    return text

def PreProcessing(text, unk='<UNK>'):

    # remove links
    text = [re.sub(r'https?:\/\/.*[\r\n]*', '', x, flags=re.MULTILINE) for x in text]

    # Clean/Normalize Arabic Text
    text = [clean_str(x) for x in text] 

    # remove emojis
    emoji_pattern = re.compile("["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           "]+", flags=re.UNICODE)
    
    text = [emoji_pattern.sub(r'', x) for x in text] # no emoji

    # remove english words
    text = [re.sub(r'\s*[A-Za-z]+\b', '' , x) for x in text]

    # tokenize
    text = [nltk.tokenize.word_tokenize(x) for x in text]

    # # remove stop-words
    # stopwords = set(nltk.corpus.stopwords.words("arabic"))

    # for i in range(len(text)):
    #     text[i] = [word for word in text[i] if word not in stopwords]

    for i in range(len(text)):
      text[i] = [word for word in text[i] if len(word)>2]

    
    for i in range(len(text)):
      if len(text[i])==0:
        text[i] = unk

    return text

In [None]:
# print(max(text_train, key=len))
print(text_train[0])
print(text_dev[0])
text_train = PreProcessing(text_train)
text_dev = PreProcessing(text_dev)
print(text_train[0])
print(text_dev[0])

بيل غيتس يتلقى لقاح #كوفيد19 من غير تصوير الابرة و لا السيرنجة و لا الدواء و لابس بولو صيفي في عز الشتاء و يقول ان إحدى مزايا عمر ال 65 عامًا هي انه مؤهل للحصول على اللقاح ... يعنى ما كان يحتاج اللقاح لو كان عمره اصغر من 65 🤔 https://t.co/QQKFFUNwBn
#مريم_رجوي: <LF>حظر خامنئي المجرم شراء #لقاح_كورونا يعد مجزرة متعمدة بحق الشعب الإيراني<LF><LF>نقل موقع مريم رجوي موقف رئيسة الجمهورية المنتخبة للمقاومة الإيرانية من تصريحات خامنئي المجرم حول حظر استيراد لقاح كورونا من الولايات المتحدة و بريطانيا و فرنسا. <LF>#اللقاح_حق_للناس https://t.co/AYXMbXjVKn
['بيل', 'غيتس', 'يتلقي', 'لقاح', 'كوفيد19', 'غير', 'تصوير', 'الابره', 'ولا', 'السيرنجه', 'ولا', 'الدواء', 'ولابس', 'بولو', 'صيفي', 'الشتاء', 'ويقول', 'احدي', 'مزايا', 'عمر', 'عاما', 'انه', 'مؤهل', 'للحصول', 'علي', 'اللقاح', 'يعني', 'كان', 'يحتاج', 'اللقاح', 'كان', 'عمره', 'اصغر']
['مريم', 'رجوي', 'حظر', 'خامنئي', 'المجرم', 'شراء', 'لقاح', 'كورونا', 'يعد', 'مجزره', 'متعمده', 'بحق', 'الشعب', 'الايراني', 'نقل', 'موقع', 'مريم', 'رجوي', 'موقف', 'رئيس

In [None]:
with open('processed_train.txt','w', encoding='utf8') as f:
	for i in text_train:
		f.write('%s\n'%i)
print(text_train[0])

['بيل', 'غيتس', 'يتلقي', 'لقاح', 'كوفيد19', 'غير', 'تصوير', 'الابره', 'ولا', 'السيرنجه', 'ولا', 'الدواء', 'ولابس', 'بولو', 'صيفي', 'الشتاء', 'ويقول', 'احدي', 'مزايا', 'عمر', 'عاما', 'انه', 'مؤهل', 'للحصول', 'علي', 'اللقاح', 'يعني', 'كان', 'يحتاج', 'اللقاح', 'كان', 'عمره', 'اصغر']


In [None]:
!pip install gensim nltk
import gensim
from gensim.models import word2vec

t_model = gensim.models.Word2Vec.load('./Downloads/full_uni_cbow_300_twitter.mdl')

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:

# token = 'الحق'
# most_similar = t_model.wv.most_similar( token, topn=10 )
# for term, score in most_similar:
#     print(term, score)

# word_vector = t_model.wv[token]
# print(len(word_vector),word_vector)
# print(len(t_model.wv.index2word))
# print(t_model.wv.index2word[0])

In [None]:
def BuildVocab(pad='<PAD>', unk='<UNK>'):

    vocab = [word for word in t_model.wv.index2word]
    vocab = [pad, unk] + vocab

    id2word = {i: word for i, word in enumerate(vocab)}
    word2id = {word: i for i, word in id2word.items()}
    vocab_size = len(vocab)

    return vocab_size, vocab, id2word, word2id    

In [None]:
vocab_size, vocab, id2word, word2id = BuildVocab()
print(vocab_size)

1259758


In [None]:
def BuildingEmbeddingMatrix():
  matrix_len = vocab_size
  emb_dim = 300

  emb_matrix = np.zeros((matrix_len, emb_dim))

  for i, word in enumerate(vocab):
      try: 
          emb_matrix[i] = t_model.wv[word]
      except KeyError:
          emb_matrix[i] = np.random.normal(scale=0.6, size=(emb_dim, ))
  
  return emb_matrix

In [None]:
emb_matrix = BuildingEmbeddingMatrix()
emb_matrix = torch.from_numpy(emb_matrix)
# print(len(emb_matrix[0]))

In [None]:
import gc
del t_model
gc.collect()

11

In [None]:
categories = set(category_train)
print(categories)
category2id = {word:i for i, word in enumerate(list(categories))}
print(category2id['celebrity'])

{'advice', 'info_news', 'rumors', 'plan', 'personal', 'others', 'restrictions', 'unrelated', 'requests', 'celebrity'}
9


# Model Building

## LSTM

### Ideas to try
1) bi-directional
2) pre-training
3) multi-layers
4) BERT
5) transformers notebook
6) packed_padded_sequences
7) pre-trained embedding

### Building Model

In [None]:
class Dataset(torch.utils.data.Dataset):

  def __init__(self, x, y, pad='<PAD>', unk='<UNK>', word2id=word2id):

    x = x.copy()

    # src lengths to be used in pack padded
    self.seq_lengths = torch.LongTensor(list(map(len, x)))

    # print(x[0], self.seq_lengths[0])

    for i in range(len(x)):
      x[i] = [word2id[word] if word in word2id else word2id[unk] for word in x[i]]
    
    # print(x[0])

    self.X = torch.nn.utils.rnn.pad_sequence([torch.tensor(sentence) for sentence in x], batch_first=True, padding_value=word2id[pad])

    print(self.X[0])

    # sort sequeces decreasing in size
    self.seq_lengths, perm_idx = self.seq_lengths.sort(0, descending=True)
    self.X = self.X[perm_idx]


    print(self.X.shape)

    print(min(self.seq_lengths))
    
    self.Y = torch.tensor(y)
    self.len = len(x)
    self.pad = pad

  def __len__(self):
    return self.len

  def __getitem__(self, idx):
    return self.X[idx], self.Y[idx], self.seq_lengths[idx]

In [None]:
stance_train_dataset = Dataset(text_train, stance_train + 1)
category_train_dataset = Dataset(text_train, [category2id[category] for category in category_train])

stance_dev_dataset = Dataset(text_dev, stance_dev + 1)
category_dev_dataset = Dataset(text_dev, [category2id[category] for category in category_dev])

tensor([ 9280, 40161, 16060, 43868,     1,   113,  3557, 32246,    23,     1,
           23, 10460, 39927, 74361, 17413,  2353,  2362,  3345, 25753,   554,
         3283,    98, 38562, 10166,    11, 92234,   141,    42,  1872, 92234,
           42,  1624,  4666,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0])
torch.Size([6988, 106])
tensor(2)
tensor([ 9280, 40161, 16060, 43868,     1,   113,  3557, 32246,    23,     1,
           23, 10460, 39927, 74361, 17413,  2353,  2362, 

In [None]:
def create_emb_layer(emb_matrix, non_trainable=False):
    num_embeddings, embedding_dim = emb_matrix.shape
    emb_layer = nn.Embedding(num_embeddings, embedding_dim)
    emb_layer.load_state_dict({'weight': emb_matrix})
    
    if non_trainable:
        emb_layer.weight.requires_grad = False

    return emb_layer, num_embeddings, embedding_dim

class LSTM(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, output_dim, dropout):
        super().__init__()

        self.hid_dim = hid_dim
        self.output_dim = output_dim

        self.embedding, num_embeddings, embedding_dim = create_emb_layer(emb_matrix, True)
        self.lstm = nn.LSTM(emb_dim, hid_dim, dropout=dropout)

        self.fc_out = nn.Linear(hid_dim, output_dim)

        self.dropout = nn.Dropout(dropout)

    def forward(self, src, src_len):
        #src = [src len, batch size]
        
        embedded = self.embedding(src)
        embedded = self.dropout(embedded)
        #embedded = [src len, batch size, emb dim]

        packed_embedded =  torch.nn.utils.rnn.pack_padded_sequence(embedded, src_len.cpu().numpy(), batch_first=False)

        outputs, (hidden, cell) = self.lstm(packed_embedded)
        #outputs = [src len, batch size, hid dim]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        #outputs are always from the top hidden layer

        prediction = self.fc_out(hidden)
        #prediction = [1, batch size, output dim]

        prediction = prediction.squeeze(0)
        #prediction = [batch size, output dim]

        return prediction        

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Training

In [None]:
def train(model, train_dataset, train_dataloader, optimizer, criterion, clip):
    
    model.train()
        
    total_acc_train = 0.0
    total_loss_train = 0.0

    for train_input, train_label, src_len in tqdm(train_dataloader):

        train_input = train_input.to(device).permute(1, 0)
        # print(train_input.shape)
        train_label = train_label.to(device)

        output = model(train_input, src_len)
        # print(output.shape, train_label.shape)
        
        output_dim = output.shape[-1]
        output = output.view(-1, output_dim)
        train_label = train_label.view(-1)

        batch_loss = criterion(output, train_label)

        total_loss_train += batch_loss
        
        acc = torch.sum(torch.argmax(output, -1) == train_label) 
        total_acc_train += acc

        optimizer.zero_grad()

        batch_loss.backward()

        # torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()

    # calculate loss    
    epoch_loss = total_loss_train / len(train_dataset)
    
    # calculate accuracy
    epoch_acc = total_acc_train / len(train_dataset)

    # calculate f1 score
    train_input, train_label, src_len = train_dataset[:]
    train_input = train_input.to(device).permute(1, 0)
    train_label = train_label.to(device)
    output = model(train_input, src_len)

    output_dim = output.shape[-1]
    output = output.view(-1, output_dim)
    train_label = train_label.view(-1)
    y_true, y_pred = train_label, torch.argmax(output, -1)

    f1_macro = f1_score(y_true.cpu(), y_pred.cpu(), average='macro')

    return epoch_loss, epoch_acc, f1_macro

In [None]:
def evaluate(model, test_dataset, test_dataloader, criterion):

  model.eval()

  total_acc_test = 0.0
  total_loss_test = 0.0
  
  with torch.no_grad():

    for test_input, test_label, src_len in tqdm(test_dataloader):

      test_input = test_input.to(device).permute(1, 0)
      test_label = test_label.to(device)


      output = model(test_input, src_len)

      batch_loss = criterion(output.view(-1, model.output_dim), test_label.view(-1))

      total_loss_test += batch_loss

      acc = torch.sum(torch.argmax(output, -1)==test_label)
      total_acc_test += acc

    # calculate loss
    total_loss_test /= len(test_dataset)

    # calculate accuracy
    total_acc_test /= len(test_dataset)

    # calculate f1 score
    test_input, test_label, src_len = test_dataset[:]
    test_input = test_input.to(device).permute(1, 0)
    test_label = test_label.to(device)
    output = model(test_input, src_len)

    output_dim = output.shape[-1]
    output = output.view(-1, output_dim)
    test_label = test_label.view(-1)
    y_true, y_pred = test_label, torch.argmax(output, -1)

    f1_macro = f1_score(y_true.cpu(), y_pred.cpu(), average='macro')
  
  return total_loss_test, total_acc_test, f1_macro

In [None]:
def train_evaluate(model, train_dataset, dev_dataset, model_name, batch_size=512, epochs=20, learning_rate=0.01, clip=1):

  train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)

  dev_dataloader = torch.utils.data.DataLoader(dev_dataset, batch_size=batch_size)

  # criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)
  criterion = nn.CrossEntropyLoss()

  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

  use_cuda = torch.cuda.is_available()
  device = torch.device("cuda" if use_cuda else "cpu")
  
  model = model.to(device)
  criterion = criterion.to(device)

  best_f1_macro = 0

  for epoch_num in range(epochs):

    epoch_loss, epoch_acc, train_f1_macro = train(model, train_dataset, train_dataloader, optimizer, criterion, clip)
    dev_loss, dev_acc, dev_f1_macro = evaluate(model, dev_dataset, dev_dataloader, criterion)

    if dev_f1_macro > best_f1_macro:
      best_f1_macro = dev_f1_macro
      torch.save(model.state_dict(), 'best_'+model_name+'.pt')

    print(f'Train = Epochs: {epoch_num + 1} | Loss: {epoch_loss} | Accuracy: {epoch_acc} | f1_macro : {train_f1_macro}')
    print(f'Dev = Epochs: {epoch_num + 1} | Loss: {dev_loss} | Accuracy: {dev_acc} | f1_macro : {dev_f1_macro}')    

  model.load_state_dict(torch.load('best_'+model_name+'.pt'))

  dev_loss, dev_acc, dev_f1_macro = evaluate(model, dev_dataset, dev_dataloader, criterion)

  print(f'Best Dev = Loss: {dev_loss} | Accuracy: {dev_acc} | f1_macro : {dev_f1_macro}')

In [None]:
INPUT_DIM = vocab_size
OUTPUT_DIM = 3
EMB_DIM = 300 #256
HID_DIM = 512 #512
DROPOUT = 0.5

emb_matrix.to(device)
stance_model = LSTM(INPUT_DIM, EMB_DIM, HID_DIM, OUTPUT_DIM, DROPOUT).to(device)

def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        
# stance_model.apply(init_weights)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(stance_model):,} trainable parameters')



The model has 1,668,611 trainable parameters


In [None]:
train_evaluate(stance_model, stance_train_dataset, stance_dev_dataset, 'LSTM_stance_model')

100%|██████████| 14/14 [00:01<00:00,  8.22it/s]
100%|██████████| 2/2 [00:00<00:00, 87.39it/s]


Train = Epochs: 1 | Loss: 0.0016761963488534093 | Accuracy: 0.6881797313690186 | f1_macro : 0.2979401174261628
Dev = Epochs: 1 | Loss: 0.0012787414016202092 | Accuracy: 0.8040000200271606 | f1_macro : 0.29711751662971175


100%|██████████| 14/14 [00:00<00:00, 26.62it/s]
100%|██████████| 2/2 [00:00<00:00, 98.91it/s]


Train = Epochs: 2 | Loss: 0.0012971303658559918 | Accuracy: 0.7900686860084534 | f1_macro : 0.3032155483821168
Dev = Epochs: 2 | Loss: 0.0012713299365714192 | Accuracy: 0.8040000200271606 | f1_macro : 0.29711751662971175


100%|██████████| 14/14 [00:00<00:00, 30.84it/s]
100%|██████████| 2/2 [00:00<00:00, 87.66it/s]


Train = Epochs: 3 | Loss: 0.0012468205532059073 | Accuracy: 0.7925013899803162 | f1_macro : 0.3191647215944832
Dev = Epochs: 3 | Loss: 0.0012946464121341705 | Accuracy: 0.8030000329017639 | f1_macro : 0.2969125531521538


100%|██████████| 14/14 [00:00<00:00, 30.56it/s]
100%|██████████| 2/2 [00:00<00:00, 97.62it/s]


Train = Epochs: 4 | Loss: 0.001213718089275062 | Accuracy: 0.7946479320526123 | f1_macro : 0.3212042924152704
Dev = Epochs: 4 | Loss: 0.001312458305619657 | Accuracy: 0.8040000200271606 | f1_macro : 0.29711751662971175


100%|██████████| 14/14 [00:00<00:00, 30.77it/s]
100%|██████████| 2/2 [00:00<00:00, 88.92it/s]


Train = Epochs: 5 | Loss: 0.0011689293896779418 | Accuracy: 0.7985116839408875 | f1_macro : 0.3490400010046384
Dev = Epochs: 5 | Loss: 0.0013499411288648844 | Accuracy: 0.8040000200271606 | f1_macro : 0.3073729543496985


100%|██████████| 14/14 [00:00<00:00, 26.40it/s]
100%|██████████| 2/2 [00:00<00:00, 88.79it/s]


Train = Epochs: 6 | Loss: 0.001135959173552692 | Accuracy: 0.8000858426094055 | f1_macro : 0.38407876780404654
Dev = Epochs: 6 | Loss: 0.0013651772169396281 | Accuracy: 0.800000011920929 | f1_macro : 0.3063919255115977


100%|██████████| 14/14 [00:00<00:00, 30.97it/s]
100%|██████████| 2/2 [00:00<00:00, 90.53it/s]


Train = Epochs: 7 | Loss: 0.0010960103245452046 | Accuracy: 0.8050944209098816 | f1_macro : 0.40681974278664423
Dev = Epochs: 7 | Loss: 0.0013972677988931537 | Accuracy: 0.800000011920929 | f1_macro : 0.3112100205921717


100%|██████████| 14/14 [00:00<00:00, 26.65it/s]
100%|██████████| 2/2 [00:00<00:00, 88.10it/s]


Train = Epochs: 8 | Loss: 0.0010619156528264284 | Accuracy: 0.8096737265586853 | f1_macro : 0.4487416658943208
Dev = Epochs: 8 | Loss: 0.0014468637527897954 | Accuracy: 0.7950000166893005 | f1_macro : 0.3144724556489263


100%|██████████| 14/14 [00:00<00:00, 26.96it/s]
100%|██████████| 2/2 [00:00<00:00, 84.24it/s]


Train = Epochs: 9 | Loss: 0.0010206870501860976 | Accuracy: 0.8138236999511719 | f1_macro : 0.47461994343007036
Dev = Epochs: 9 | Loss: 0.001529464265331626 | Accuracy: 0.7940000295639038 | f1_macro : 0.33132622418336705


100%|██████████| 14/14 [00:00<00:00, 26.62it/s]
100%|██████████| 2/2 [00:00<00:00, 85.67it/s]


Train = Epochs: 10 | Loss: 0.0009931293316185474 | Accuracy: 0.8174012303352356 | f1_macro : 0.48195816404553177
Dev = Epochs: 10 | Loss: 0.0015918752178549767 | Accuracy: 0.7880000472068787 | f1_macro : 0.3173945303522768


100%|██████████| 14/14 [00:00<00:00, 30.47it/s]
100%|██████████| 2/2 [00:00<00:00, 90.25it/s]


Train = Epochs: 11 | Loss: 0.0009683131356723607 | Accuracy: 0.8219805359840393 | f1_macro : 0.5084653389755407
Dev = Epochs: 11 | Loss: 0.001595882000401616 | Accuracy: 0.7880000472068787 | f1_macro : 0.3368082779181307


100%|██████████| 14/14 [00:00<00:00, 26.57it/s]
100%|██████████| 2/2 [00:00<00:00, 86.31it/s]


Train = Epochs: 12 | Loss: 0.0009378723916597664 | Accuracy: 0.83171147108078 | f1_macro : 0.538721670694493
Dev = Epochs: 12 | Loss: 0.0016297631664201617 | Accuracy: 0.7790000438690186 | f1_macro : 0.32195769874214747


100%|██████████| 14/14 [00:00<00:00, 30.32it/s]
100%|██████████| 2/2 [00:00<00:00, 83.48it/s]


Train = Epochs: 13 | Loss: 0.000905512017197907 | Accuracy: 0.8358614444732666 | f1_macro : 0.5879472372620925
Dev = Epochs: 13 | Loss: 0.001652013510465622 | Accuracy: 0.7870000600814819 | f1_macro : 0.35198929281890295


100%|██████████| 14/14 [00:00<00:00, 27.13it/s]
100%|██████████| 2/2 [00:00<00:00, 68.42it/s]


Train = Epochs: 14 | Loss: 0.0009049322688952088 | Accuracy: 0.8354321718215942 | f1_macro : 0.5919811043243174
Dev = Epochs: 14 | Loss: 0.0016741903964430094 | Accuracy: 0.7710000276565552 | f1_macro : 0.3379375705260676


100%|██████████| 14/14 [00:00<00:00, 30.79it/s]
100%|██████████| 2/2 [00:00<00:00, 85.09it/s]


Train = Epochs: 15 | Loss: 0.0008683985797688365 | Accuracy: 0.8397251963615417 | f1_macro : 0.6247275404153103
Dev = Epochs: 15 | Loss: 0.0017523921560496092 | Accuracy: 0.7680000066757202 | f1_macro : 0.3325755805486977


100%|██████████| 14/14 [00:00<00:00, 30.32it/s]
100%|██████████| 2/2 [00:00<00:00, 87.95it/s]


Train = Epochs: 16 | Loss: 0.0008562186849303544 | Accuracy: 0.846737265586853 | f1_macro : 0.6362689369564665
Dev = Epochs: 16 | Loss: 0.001768201240338385 | Accuracy: 0.7600000500679016 | f1_macro : 0.32963554608251244


100%|██████████| 14/14 [00:00<00:00, 30.06it/s]
100%|██████████| 2/2 [00:00<00:00, 86.96it/s]


Train = Epochs: 17 | Loss: 0.0008271705592051148 | Accuracy: 0.8498855233192444 | f1_macro : 0.6310138422254071
Dev = Epochs: 17 | Loss: 0.0017904189880937338 | Accuracy: 0.7680000066757202 | f1_macro : 0.3427360572542049


100%|██████████| 14/14 [00:00<00:00, 26.35it/s]
100%|██████████| 2/2 [00:00<00:00, 52.50it/s]


Train = Epochs: 18 | Loss: 0.0008316556341014802 | Accuracy: 0.8487406969070435 | f1_macro : 0.6318205461544547
Dev = Epochs: 18 | Loss: 0.001844170386902988 | Accuracy: 0.7660000324249268 | f1_macro : 0.3463924147285777


100%|██████████| 14/14 [00:00<00:00, 30.80it/s]
100%|██████████| 2/2 [00:00<00:00, 36.85it/s]


Train = Epochs: 19 | Loss: 0.0007949349237605929 | Accuracy: 0.8543216586112976 | f1_macro : 0.6367966493589187
Dev = Epochs: 19 | Loss: 0.0018669343553483486 | Accuracy: 0.781000018119812 | f1_macro : 0.34205477567912074


100%|██████████| 14/14 [00:00<00:00, 29.31it/s]
100%|██████████| 2/2 [00:00<00:00, 91.00it/s]


Train = Epochs: 20 | Loss: 0.0007825379725545645 | Accuracy: 0.8537492752075195 | f1_macro : 0.6440529838364707
Dev = Epochs: 20 | Loss: 0.0019352296367287636 | Accuracy: 0.7690000534057617 | f1_macro : 0.33306847123204664


100%|██████████| 2/2 [00:00<00:00, 62.13it/s]

Best Dev = Loss: 0.001652013510465622 | Accuracy: 0.7870000600814819 | f1_macro : 0.35198929281890295





In [None]:
INPUT_DIM = vocab_size
OUTPUT_DIM = 10
EMB_DIM = 300 #256
HID_DIM = 100 #512
DROPOUT = 0.5

category_model = LSTM(INPUT_DIM, EMB_DIM, HID_DIM, OUTPUT_DIM, DROPOUT).to(device)

def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        
# category_model.apply(init_weights)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(category_model):,} trainable parameters')



The model has 161,810 trainable parameters


In [None]:
train_evaluate(category_model, category_train_dataset, category_dev_dataset, 'LSTM_category_model')

100%|██████████| 14/14 [00:00<00:00, 95.32it/s]
100%|██████████| 2/2 [00:00<00:00, 185.77it/s]


Train = Epochs: 1 | Loss: 0.003327268175780773 | Accuracy: 0.4743846356868744 | f1_macro : 0.06817581588379551
Dev = Epochs: 1 | Loss: 0.002977822907269001 | Accuracy: 0.5450000166893005 | f1_macro : 0.07055016181229774


100%|██████████| 14/14 [00:00<00:00, 94.78it/s]
100%|██████████| 2/2 [00:00<00:00, 169.53it/s]


Train = Epochs: 2 | Loss: 0.0030256854370236397 | Accuracy: 0.5173153877258301 | f1_macro : 0.06818824860888427
Dev = Epochs: 2 | Loss: 0.0029775407165288925 | Accuracy: 0.5450000166893005 | f1_macro : 0.07055016181229774


100%|██████████| 14/14 [00:00<00:00, 99.92it/s]
100%|██████████| 2/2 [00:00<00:00, 139.73it/s]


Train = Epochs: 3 | Loss: 0.0029920313972979784 | Accuracy: 0.5171722769737244 | f1_macro : 0.06838942221590348
Dev = Epochs: 3 | Loss: 0.0029786869417876005 | Accuracy: 0.5450000166893005 | f1_macro : 0.07055016181229774


100%|██████████| 14/14 [00:00<00:00, 124.78it/s]
100%|██████████| 2/2 [00:00<00:00, 197.81it/s]


Train = Epochs: 4 | Loss: 0.0029585901647806168 | Accuracy: 0.5176016092300415 | f1_macro : 0.06933277642226819
Dev = Epochs: 4 | Loss: 0.002987214596942067 | Accuracy: 0.5450000166893005 | f1_macro : 0.07205045116905129


100%|██████████| 14/14 [00:00<00:00, 96.30it/s]
100%|██████████| 2/2 [00:00<00:00, 163.31it/s]


Train = Epochs: 5 | Loss: 0.0029221016447991133 | Accuracy: 0.5190325975418091 | f1_macro : 0.07238525628535736
Dev = Epochs: 5 | Loss: 0.0030150802340358496 | Accuracy: 0.5440000295639038 | f1_macro : 0.07475574718152536


100%|██████████| 14/14 [00:00<00:00, 95.78it/s]
100%|██████████| 2/2 [00:00<00:00, 178.22it/s]


Train = Epochs: 6 | Loss: 0.0028828594367951155 | Accuracy: 0.5216084718704224 | f1_macro : 0.07716238751438978
Dev = Epochs: 6 | Loss: 0.003025543875992298 | Accuracy: 0.5360000133514404 | f1_macro : 0.07135849355170504


100%|██████████| 14/14 [00:00<00:00, 119.96it/s]
100%|██████████| 2/2 [00:00<00:00, 162.66it/s]


Train = Epochs: 7 | Loss: 0.0028581502847373486 | Accuracy: 0.5221808552742004 | f1_macro : 0.08663897409373936
Dev = Epochs: 7 | Loss: 0.0030434003565460443 | Accuracy: 0.5420000553131104 | f1_macro : 0.07550078766125082


100%|██████████| 14/14 [00:00<00:00, 95.76it/s]
100%|██████████| 2/2 [00:00<00:00, 170.57it/s]


Train = Epochs: 8 | Loss: 0.0028206517454236746 | Accuracy: 0.5251860022544861 | f1_macro : 0.09494222539144902
Dev = Epochs: 8 | Loss: 0.0030596558935940266 | Accuracy: 0.5420000553131104 | f1_macro : 0.07332001879699249


100%|██████████| 14/14 [00:00<00:00, 125.78it/s]
100%|██████████| 2/2 [00:00<00:00, 173.42it/s]


Train = Epochs: 9 | Loss: 0.002785559743642807 | Accuracy: 0.5269032716751099 | f1_macro : 0.1119481003243988
Dev = Epochs: 9 | Loss: 0.0030792716424912214 | Accuracy: 0.5320000052452087 | f1_macro : 0.07514311886088208


100%|██████████| 14/14 [00:00<00:00, 112.78it/s]
100%|██████████| 2/2 [00:00<00:00, 160.78it/s]


Train = Epochs: 10 | Loss: 0.0027468553744256496 | Accuracy: 0.5323411226272583 | f1_macro : 0.12225165120759181
Dev = Epochs: 10 | Loss: 0.0031225215643644333 | Accuracy: 0.5320000052452087 | f1_macro : 0.0738567188207476


100%|██████████| 14/14 [00:00<00:00, 128.54it/s]
100%|██████████| 2/2 [00:00<00:00, 161.66it/s]


Train = Epochs: 11 | Loss: 0.002725360682234168 | Accuracy: 0.5337721705436707 | f1_macro : 0.13251351788454385
Dev = Epochs: 11 | Loss: 0.003156650345772505 | Accuracy: 0.5290000438690186 | f1_macro : 0.07366750633548874


100%|██████████| 14/14 [00:00<00:00, 129.64it/s]
100%|██████████| 2/2 [00:00<00:00, 124.04it/s]


Train = Epochs: 12 | Loss: 0.0026989656034857035 | Accuracy: 0.5343445539474487 | f1_macro : 0.1398794625250041
Dev = Epochs: 12 | Loss: 0.003166728187352419 | Accuracy: 0.5290000438690186 | f1_macro : 0.0802075011096316


100%|██████████| 14/14 [00:00<00:00, 88.69it/s]
100%|██████████| 2/2 [00:00<00:00, 166.90it/s]


Train = Epochs: 13 | Loss: 0.0026807785034179688 | Accuracy: 0.5412135124206543 | f1_macro : 0.15836478804114693
Dev = Epochs: 13 | Loss: 0.0031739866826683283 | Accuracy: 0.5300000309944153 | f1_macro : 0.09004663197344454


100%|██████████| 14/14 [00:00<00:00, 92.00it/s]
100%|██████████| 2/2 [00:00<00:00, 185.66it/s]


Train = Epochs: 14 | Loss: 0.0026505955029278994 | Accuracy: 0.5455065965652466 | f1_macro : 0.16779656411636562
Dev = Epochs: 14 | Loss: 0.003197582671418786 | Accuracy: 0.5270000100135803 | f1_macro : 0.09045664126443212


100%|██████████| 14/14 [00:00<00:00, 93.53it/s]
100%|██████████| 2/2 [00:00<00:00, 158.67it/s]


Train = Epochs: 15 | Loss: 0.0026324032805860043 | Accuracy: 0.5470806956291199 | f1_macro : 0.15966310641785295
Dev = Epochs: 15 | Loss: 0.0032106086146086454 | Accuracy: 0.5300000309944153 | f1_macro : 0.09713368389468537


100%|██████████| 14/14 [00:00<00:00, 94.27it/s]
100%|██████████| 2/2 [00:00<00:00, 174.71it/s]


Train = Epochs: 16 | Loss: 0.0025970463175326586 | Accuracy: 0.5485117435455322 | f1_macro : 0.18641036711628917
Dev = Epochs: 16 | Loss: 0.003227886278182268 | Accuracy: 0.5180000066757202 | f1_macro : 0.10337461387594057


100%|██████████| 14/14 [00:00<00:00, 93.69it/s]
100%|██████████| 2/2 [00:00<00:00, 170.86it/s]


Train = Epochs: 17 | Loss: 0.002586816903203726 | Accuracy: 0.5512306690216064 | f1_macro : 0.211558659440015
Dev = Epochs: 17 | Loss: 0.003219515085220337 | Accuracy: 0.5230000019073486 | f1_macro : 0.1047906179625524


100%|██████████| 14/14 [00:00<00:00, 92.17it/s]
100%|██████████| 2/2 [00:00<00:00, 166.16it/s]


Train = Epochs: 18 | Loss: 0.0025873309932649136 | Accuracy: 0.553806483745575 | f1_macro : 0.2126181340983942
Dev = Epochs: 18 | Loss: 0.0032386756502091885 | Accuracy: 0.5270000100135803 | f1_macro : 0.10165565539665584


100%|██████████| 14/14 [00:00<00:00, 105.84it/s]
100%|██████████| 2/2 [00:00<00:00, 164.51it/s]


Train = Epochs: 19 | Loss: 0.0025586411356925964 | Accuracy: 0.5560961365699768 | f1_macro : 0.21517146069674423
Dev = Epochs: 19 | Loss: 0.003245460567995906 | Accuracy: 0.5220000147819519 | f1_macro : 0.10086471641753211


100%|██████████| 14/14 [00:00<00:00, 109.14it/s]
100%|██████████| 2/2 [00:00<00:00, 170.83it/s]


Train = Epochs: 20 | Loss: 0.0025318004190921783 | Accuracy: 0.5619633793830872 | f1_macro : 0.2218532840379659
Dev = Epochs: 20 | Loss: 0.0032672507222741842 | Accuracy: 0.5200000405311584 | f1_macro : 0.09709828606314849


100%|██████████| 2/2 [00:00<00:00, 126.43it/s]

Best Dev = Loss: 0.003219515085220337 | Accuracy: 0.5230000019073486 | f1_macro : 0.1047906179625524





## Dumped