# Importing libraries

In [417]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import re
import nltk
from tqdm import tqdm
from sklearn.metrics import f1_score

# Data PreProcessing

In [418]:
train = pd.read_csv('Dataset/train.csv')
text_train, category_train, stance_train = train['text'], train['category'], train['stance']

dev = pd.read_csv('Dataset/dev.csv')
text_dev, category_dev, stance_dev = dev['text'], dev['category'], dev['stance']

In [419]:
train.head(10)

Unnamed: 0,text,category,stance
0,بيل غيتس يتلقى لقاح #كوفيد19 من غير تصوير الاب...,celebrity,1
1,وزير الصحة لحد اليوم وتحديدا هلأ بمؤتمروا الصح...,info_news,1
2,قولكن رح يكونو اد المسؤولية ب لبنان لما يوصل ...,info_news,1
3,#تركيا.. وزير الصحة فخر الدين قوجة يتلقى أول ج...,celebrity,1
4,وئام وهاب يشتم الدول الخليجية في كل طلة اعلامي...,personal,0
5,"لقاح #كورونا في أميركا.. قلق متزايد من ""التوزي...",info_news,0
6,لبنان اشترى مليونان لقاح امريكي اذا شلنا يلي ع...,info_news,1
7,من عوارض لقاح كورونا<LF>هو تهكير حسابك عتويتر<...,personal,0
8,هناك 1780 مليونيراً في لبنان. ماذا لو فُرضت ال...,unrelated,0
9,دعبول حضرتك منو انت وتطلب من قائد دولة إسلامية...,info_news,1


In [420]:
dev.head(10)

Unnamed: 0,text,category,stance
0,#مريم_رجوي: <LF>حظر خامنئي المجرم شراء #لقاح_ك...,info_news,1
1,#الصحة:<LF>•تم إعطاء 259.530 جرعة من لقاح #كور...,plan,1
2,#خادم_الحرمين - حفظه الله - يتلقى الجرعة الأول...,celebrity,1
3,#الصحه_العالميه: لقاحات #كورونا آمنة ولا خوف م...,info_news,1
4,"#وزيرة_الصحة ""#هالة_زايد"" تقول إنه يجرى مراجعة...",info_news,1
5,2️⃣ وانتهى الفريق من الدراسات قبل السريرية ونش...,info_news,1
6,عاجل 🔴 <LF>.<LF><LF>.<LF><LF>وزارة الصحة :<LF>...,plan,1
7,#فيديو | السفير الأميركي لدى السعودية بعد تلقي...,info_news,1
8,تصريحات وبس الحكومة مع السيسي علي حسب اللقطة! ...,info_news,0
9,الاتحاد الاوروبي تفاوض لشراء لقاحات الكورونا م...,info_news,1


In [421]:
text_train, category_train, stance_train = np.array(train['text']), np.array(train['category']), np.array(train['stance'])
text_dev, category_dev, stance_dev = np.array(dev['text']), np.array(dev['category']), np.array(dev['stance'])

print(text_train.shape, category_train.shape, stance_train.shape)
print(text_dev.shape, category_dev.shape, stance_dev.shape)

(6988,) (6988,) (6988,)
(1000,) (1000,) (1000,)


In [422]:
def PreProcessing(text):

    # remove links
    text = [re.sub(r'https?:\/\/.*[\r\n]*', '', x, flags=re.MULTILINE) for x in text]
    # text = [re.sub(r'https?:\/\/\S*', '', x, flags=re.MULTILINE) for x in text]

    # remove emojis
    emoji_pattern = re.compile("["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           "]+", flags=re.UNICODE)
    
    text = [emoji_pattern.sub(r'', x) for x in text] # no emoji

    # remove english words
    text = [re.sub(r'\s*[A-Za-z]+\b', '' , x) for x in text]

    # tokenize
    text = [nltk.tokenize.word_tokenize(x) for x in text]

    # # remove stop-words
    # stopwords = set(nltk.corpus.stopwords.words("arabic"))

    # for i in range(len(text)):
    #     text[i] = [word for word in text[i] if word not in stopwords]


    for i in range(len(text)):
        text[i] = [word for word in text[i] if len(word)>2]

    # but anything in empty strings
    for i in range(len(text)):
        if(len(text[i])==0):
            text[i]='<unk>'
    
    return text

In [423]:
print(max(text_train, key=len))
text_train = PreProcessing(text_train)
text_dev = PreProcessing(text_dev)

الامريكيين متهمون بصنع ونشر فيروس كورونا ولذلك لا يمكن الوثوق بهم”<LF>الإمام الخامنئي<LF><LF>#لقاح_آمن	info_news	0
train	حبيبنا وقرة أعيننا سيدي #خادم_الحرمين_الشريفين الملك سلمان حفظه الله يتلقى الجرعة الأولى من لقاح كورنا … نفعه الله به ومتعه بالصحة والعافيه. https://t.co/AJRzC7dCWe	celebrity	1
train	رغم تلقيه جرعتين من لقاح #فايزر.. إصابة كبير حاخامات #تل_أبيب، يسرائيل لاو،  83 عاما، بفيروس #كورونا، حيث انتقلت له العدوى من زوجته بعد مخالطتها مصابا آخر https://t.co/RGI6WTgrxf	celebrity	0
train	تلقيت قبل قليل الجرعة الثانية من لقاح كورونا، وكلي فخر بجهود وطننا الغالي وتوجيهات قيادتنا الرشيدة التي تؤكد أن صحة الإنسان أولاً.  🇸🇦🇸🇦🇸🇦🇸🇦 https://t.co/XGstr9Zvzf	info_news	1
train	شركة صحة": جزيل الشكر للمواطنة ملهية شويرب سعيد العامري، التي تبلغ ١٠٢ عاماً<LF>لكونها قدوة لجميع أفراد المجتمع من خلال <LF> تلقيها أول جرعة من لقاح كوفيد-19 في مركز القوع الصحي #الإمارات_اليوم https://t.co/uBSCd0JZ4Y


In [424]:
with open('processed_train.txt','w', encoding='utf8') as f:
	for i in text_train:
		f.write('%s\n'%i)
print(text_train[0])

['بيل', 'غيتس', 'يتلقى', 'لقاح', 'كوفيد19', 'غير', 'تصوير', 'الابرة', 'السيرنجة', 'الدواء', 'لابس', 'بولو', 'صيفي', 'الشتاء', 'يقول', 'إحدى', 'مزايا', 'عمر', 'عامًا', 'انه', 'مؤهل', 'للحصول', 'على', 'اللقاح', '...', 'يعنى', 'كان', 'يحتاج', 'اللقاح', 'كان', 'عمره', 'اصغر']


In [425]:
def BuildVocab(text, pad='<pad>', unk='<unk>'):

    vocab = set()    
    for x in text:
        vocab |= set(x)

    vocab = [pad, unk] + list(vocab)

    id2word = {i: word for i, word in enumerate(vocab)}
    word2id = {word: i for i, word in id2word.items()}
    vocab_size = len(vocab)

    return vocab_size, vocab, id2word, word2id    

In [426]:
vocab_size, vocab, id2word, word2id = BuildVocab(text_train)
print(vocab_size)

32058


In [427]:
categories = set(category_train)
print(categories)
category2id = {word:i for i, word in enumerate(list(categories))}
print(category2id['celebrity'])

{'celebrity', 'requests', 'others', 'personal', 'restrictions', 'advice', 'info_news', 'plan', 'rumors', 'unrelated'}
0


# Model Building

## LSTM

### Ideas to try
1) bi-directional
2) pre-training
3) multi-layers
4) BERT
5) transformers notebook
6) packed_padded_sequences
7) pre-trained embedding

### Building Model

In [428]:
class Dataset(torch.utils.data.Dataset):

  def __init__(self, x, y, pad='<pad>', unk='<unk>', word2id=word2id):

    x = x.copy()

    # src lengths to be used in pack padded
    self.seq_lengths = torch.LongTensor(list(map(len, x)))

    print(x[0], self.seq_lengths[0])

    for i in range(len(x)):
      x[i] = [word2id[word] if word in word2id else word2id[unk] for word in x[i]]
    
    print(x[0])

    self.X = torch.nn.utils.rnn.pad_sequence([torch.tensor(sentence) for sentence in x], batch_first=True, padding_value=word2id[pad])

    # sort sequeces decreasing in size
    self.seq_lengths, perm_idx = self.seq_lengths.sort(0, descending=True)
    self.X = self.X[perm_idx]

    print(self.X[0])

    print(self.X.shape)

    print(min(self.seq_lengths))
    
    self.Y = torch.tensor(y)
    self.len = len(x)
    self.pad = pad

  def __len__(self):
    return self.len

  def __getitem__(self, idx):
    return self.X[idx], self.Y[idx], self.seq_lengths[idx]

In [429]:
stance_train_dataset = Dataset(text_train, stance_train + 1)
category_train_dataset = Dataset(text_train, [category2id[category] for category in category_train])

stance_dev_dataset = Dataset(text_dev, stance_dev + 1)
category_dev_dataset = Dataset(text_dev, [category2id[category] for category in category_dev])

['بيل', 'غيتس', 'يتلقى', 'لقاح', 'كوفيد19', 'غير', 'تصوير', 'الابرة', 'السيرنجة', 'الدواء', 'لابس', 'بولو', 'صيفي', 'الشتاء', 'يقول', 'إحدى', 'مزايا', 'عمر', 'عامًا', 'انه', 'مؤهل', 'للحصول', 'على', 'اللقاح', '...', 'يعنى', 'كان', 'يحتاج', 'اللقاح', 'كان', 'عمره', 'اصغر'] tensor(32)
[15030, 14341, 25741, 30111, 18289, 4561, 26464, 10744, 28331, 8426, 16291, 21587, 28526, 17899, 29893, 146, 17915, 102, 6346, 14485, 7116, 30255, 5283, 24406, 30131, 6777, 28145, 18442, 24406, 28145, 18383, 12639]
tensor([12703, 30089, 31563, 21456,  2244,  6995, 29098, 29052, 16216, 23061,
          812, 26852, 25494, 28991, 24539, 30334,  3239, 31392, 11615, 20873,
         2582, 19385, 28177, 25741,  4128, 24880, 30111, 10062, 14584, 28177,
        14122, 31410, 27316, 23687,  3219, 29023, 30111, 18152, 20611, 21934,
         2444,  6858,  2394, 16127,  3437, 17172,  8597,  7382, 21099,  1416,
         8769, 10547, 13242, 13193, 21455, 23953, 22379, 15023,  4128, 16948,
        30111,  8597, 10179,  628

In [430]:
class LSTM(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, output_dim, dropout):
        super().__init__()

        self.hid_dim = hid_dim
        self.output_dim = output_dim

        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.lstm = nn.LSTM(emb_dim, hid_dim, dropout=dropout)

        self.fc_out = nn.Linear(hid_dim, output_dim)

        self.dropout = nn.Dropout(dropout)

    def forward(self, src, src_len):
        #src = [src len, batch size]
        
        embedded = self.embedding(src)
        embedded = self.dropout(embedded)
        #embedded = [src len, batch size, emb dim]

        packed_embedded =  torch.nn.utils.rnn.pack_padded_sequence(embedded, src_len.cpu().numpy(), batch_first=False)

        outputs, (hidden, cell) = self.lstm(packed_embedded)
        #outputs = [src len, batch size, hid dim]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        #outputs are always from the top hidden layer

        prediction = self.fc_out(hidden)
        #prediction = [1, batch size, output dim]

        prediction = prediction.squeeze(0)
        #prediction = [batch size, output dim]

        return prediction        

In [431]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Training

In [432]:
def train(model, train_dataset, train_dataloader, optimizer, criterion, clip):
    
    model.train()
        
    total_acc_train = 0.0
    total_loss_train = 0.0

    for train_input, train_label, src_len in tqdm(train_dataloader):

        train_input = train_input.to(device).permute(1, 0)
        # print(train_input.shape)
        train_label = train_label.to(device)

        output = model(train_input, src_len)
        # print(output.shape, train_label.shape)
        
        output_dim = output.shape[-1]
        output = output.view(-1, output_dim)
        train_label = train_label.view(-1)

        batch_loss = criterion(output, train_label)

        total_loss_train += batch_loss
        
        acc = torch.sum(torch.argmax(output, -1) == train_label) 
        total_acc_train += acc

        optimizer.zero_grad()

        batch_loss.backward()

        # torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()

    # calculate loss    
    epoch_loss = total_loss_train / len(train_dataset)
    
    # calculate accuracy
    epoch_acc = total_acc_train / len(train_dataset)

    # calculate f1 score
    train_input, train_label, src_len = train_dataset[:]
    train_input = train_input.to(device).permute(1, 0)
    train_label = train_label.to(device)
    output = model(train_input, src_len)

    output_dim = output.shape[-1]
    output = output.view(-1, output_dim)
    train_label = train_label.view(-1)
    y_true, y_pred = train_label, torch.argmax(output, -1)

    f1_macro = f1_score(y_true, y_pred, average='macro')

    return epoch_loss, epoch_acc, f1_macro

In [433]:
def evaluate(model, test_dataset, test_dataloader, criterion):

  model.eval()

  total_acc_test = 0.0
  total_loss_test = 0.0
  
  with torch.no_grad():

    for test_input, test_label, src_len in tqdm(test_dataloader):

      test_input = test_input.to(device).permute(1, 0)
      test_label = test_label.to(device)


      output = model(test_input, src_len)

      batch_loss = criterion(output.view(-1, model.output_dim), test_label.view(-1))

      total_loss_test += batch_loss

      acc = torch.sum(torch.argmax(output, -1)==test_label)
      total_acc_test += acc

    # calculate loss
    total_loss_test /= len(test_dataset)

    # calculate accuracy
    total_acc_test /= len(test_dataset)

    # calculate f1 score
    test_input, test_label, src_len = test_dataset[:]
    test_input = test_input.to(device).permute(1, 0)
    test_label = test_label.to(device)
    output = model(test_input, src_len)

    output_dim = output.shape[-1]
    output = output.view(-1, output_dim)
    test_label = test_label.view(-1)
    y_true, y_pred = test_label, torch.argmax(output, -1)

    f1_macro = f1_score(y_true, y_pred, average='macro')
  
  return total_loss_test, total_acc_test, f1_macro

In [434]:
def train_evaluate(model, train_dataset, dev_dataset, model_name, batch_size=512, epochs=10, learning_rate=0.01, clip=1):

  train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)

  dev_dataloader = torch.utils.data.DataLoader(dev_dataset, batch_size=batch_size)

  # criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)
  criterion = nn.CrossEntropyLoss()

  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

  use_cuda = torch.cuda.is_available()
  device = torch.device("cuda" if use_cuda else "cpu")
  
  model = model.to(device)
  criterion = criterion.to(device)

  best_f1_macro = 0

  for epoch_num in range(epochs):

    epoch_loss, epoch_acc, train_f1_macro = train(model, train_dataset, train_dataloader, optimizer, criterion, clip)
    dev_loss, dev_acc, dev_f1_macro = evaluate(model, dev_dataset, dev_dataloader, criterion)

    if dev_f1_macro > best_f1_macro:
      best_f1_macro = dev_f1_macro
      torch.save(model.state_dict(), 'best_'+model_name+'.pt')

    print(f'Train = Epochs: {epoch_num + 1} | Loss: {epoch_loss} | Accuracy: {epoch_acc} | f1_macro : {train_f1_macro}')
    print(f'Dev = Epochs: {epoch_num + 1} | Loss: {dev_loss} | Accuracy: {dev_acc} | f1_macro : {dev_f1_macro}')    

  model.load_state_dict(torch.load('best_'+model_name+'.pt'))

  dev_loss, dev_acc, dev_f1_macro = evaluate(model, dev_dataset, dev_dataloader, criterion)

  print(f'Best Dev = Loss: {dev_loss} | Accuracy: {dev_acc} | f1_macro : {dev_f1_macro}')

In [435]:
INPUT_DIM = vocab_size
OUTPUT_DIM = 3
EMB_DIM = 50 #256
HID_DIM = 50gi #512
DROPOUT = 0.5

stance_model = LSTM(INPUT_DIM, EMB_DIM, HID_DIM, OUTPUT_DIM, DROPOUT).to(device)

def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        
stance_model.apply(init_weights)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(stance_model):,} trainable parameters')

The model has 1,623,453 trainable parameters




In [436]:
train_evaluate(stance_model, stance_train_dataset, stance_dev_dataset, 'stance_model')

100%|██████████| 14/14 [00:05<00:00,  2.38it/s]
100%|██████████| 2/2 [00:00<00:00, 19.47it/s]


Train = Epochs: 1 | Loss: 0.0015259748324751854 | Accuracy: 0.7925014495849609 | f1_macro : 0.29474692639310235
Dev = Epochs: 1 | Loss: 0.0012701037339866161 | Accuracy: 0.8040000200271606 | f1_macro : 0.29711751662971175


100%|██████████| 14/14 [00:05<00:00,  2.64it/s]
100%|██████████| 2/2 [00:00<00:00, 16.84it/s]


Train = Epochs: 2 | Loss: 0.0012544351629912853 | Accuracy: 0.7925014495849609 | f1_macro : 0.29474692639310235
Dev = Epochs: 2 | Loss: 0.001246238243766129 | Accuracy: 0.8040000200271606 | f1_macro : 0.29711751662971175


100%|██████████| 14/14 [00:04<00:00,  2.85it/s]
100%|██████████| 2/2 [00:00<00:00, 23.87it/s]


Train = Epochs: 3 | Loss: 0.0011045760475099087 | Accuracy: 0.7945048809051514 | f1_macro : 0.4897222392394179
Dev = Epochs: 3 | Loss: 0.0014661073219031096 | Accuracy: 0.7839999794960022 | f1_macro : 0.31420734948216483


100%|██████████| 14/14 [00:04<00:00,  2.82it/s]
100%|██████████| 2/2 [00:00<00:00, 18.92it/s]


Train = Epochs: 4 | Loss: 0.0007243914296850562 | Accuracy: 0.8626216650009155 | f1_macro : 0.5457956139634875
Dev = Epochs: 4 | Loss: 0.0017220575828105211 | Accuracy: 0.7609999775886536 | f1_macro : 0.3033629223560574


100%|██████████| 14/14 [00:04<00:00,  3.00it/s]
100%|██████████| 2/2 [00:00<00:00, 26.04it/s]


Train = Epochs: 5 | Loss: 0.0006392489885911345 | Accuracy: 0.879221498966217 | f1_macro : 0.5665444765405875
Dev = Epochs: 5 | Loss: 0.002097613178193569 | Accuracy: 0.7170000076293945 | f1_macro : 0.31152287768239817


100%|██████████| 14/14 [00:04<00:00,  2.89it/s]
100%|██████████| 2/2 [00:00<00:00, 19.47it/s]


Train = Epochs: 6 | Loss: 0.0004904825473204255 | Accuracy: 0.9035489559173584 | f1_macro : 0.6681330998445826
Dev = Epochs: 6 | Loss: 0.0021852769423276186 | Accuracy: 0.675000011920929 | f1_macro : 0.3255349794238683


100%|██████████| 14/14 [00:04<00:00,  2.90it/s]
100%|██████████| 2/2 [00:00<00:00, 21.77it/s]


Train = Epochs: 7 | Loss: 0.0004377341247163713 | Accuracy: 0.9159988760948181 | f1_macro : 0.8257415978678635
Dev = Epochs: 7 | Loss: 0.002176793059334159 | Accuracy: 0.753000020980835 | f1_macro : 0.31668197605701043


100%|██████████| 14/14 [00:04<00:00,  2.87it/s]
100%|██████████| 2/2 [00:00<00:00, 15.89it/s]


Train = Epochs: 8 | Loss: 0.00034524285001680255 | Accuracy: 0.9400400519371033 | f1_macro : 0.8622020009771952
Dev = Epochs: 8 | Loss: 0.0025902995839715004 | Accuracy: 0.5879999995231628 | f1_macro : 0.33001760669974883


100%|██████████| 14/14 [00:05<00:00,  2.80it/s]
100%|██████████| 2/2 [00:00<00:00, 16.44it/s]


Train = Epochs: 9 | Loss: 0.00030241796048358083 | Accuracy: 0.9502003192901611 | f1_macro : 0.8856243274159352
Dev = Epochs: 9 | Loss: 0.0023979563266038895 | Accuracy: 0.628000020980835 | f1_macro : 0.321278928762374


100%|██████████| 14/14 [00:05<00:00,  2.69it/s]
100%|██████████| 2/2 [00:00<00:00, 15.67it/s]


Train = Epochs: 10 | Loss: 0.0002805905824061483 | Accuracy: 0.9527761936187744 | f1_macro : 0.9037462447264178
Dev = Epochs: 10 | Loss: 0.0023425607942044735 | Accuracy: 0.699999988079071 | f1_macro : 0.3143207704367774


100%|██████████| 2/2 [00:00<00:00, 20.89it/s]

Best Dev = Loss: 0.0025902995839715004 | Accuracy: 0.5879999995231628 | f1_macro : 0.33001760669974883





In [437]:
INPUT_DIM = vocab_size
OUTPUT_DIM = 10
EMB_DIM = 50 #256
HID_DIM = 50 #512
DROPOUT = 0.5

category_model = LSTM(INPUT_DIM, EMB_DIM, HID_DIM, OUTPUT_DIM, DROPOUT).to(device)

def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        
category_model.apply(init_weights)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(category_model):,} trainable parameters')

The model has 1,623,810 trainable parameters




In [438]:
train_evaluate(category_model, category_train_dataset, category_dev_dataset, 'category_model')

100%|██████████| 14/14 [00:04<00:00,  2.94it/s]
100%|██████████| 2/2 [00:00<00:00, 17.72it/s]


Train = Epochs: 1 | Loss: 0.003572741523385048 | Accuracy: 0.5176016092300415 | f1_macro : 0.06820067898906072
Dev = Epochs: 1 | Loss: 0.003028520615771413 | Accuracy: 0.5450000166893005 | f1_macro : 0.07055016181229774


100%|██████████| 14/14 [00:04<00:00,  2.94it/s]
100%|██████████| 2/2 [00:00<00:00, 21.11it/s]


Train = Epochs: 2 | Loss: 0.003051271429285407 | Accuracy: 0.5174584984779358 | f1_macro : 0.06820067898906072
Dev = Epochs: 2 | Loss: 0.0029543552082031965 | Accuracy: 0.5450000166893005 | f1_macro : 0.07055016181229774


100%|██████████| 14/14 [00:05<00:00,  2.77it/s]
100%|██████████| 2/2 [00:00<00:00, 24.71it/s]


Train = Epochs: 3 | Loss: 0.0029514392372220755 | Accuracy: 0.5174584984779358 | f1_macro : 0.07226870089498469
Dev = Epochs: 3 | Loss: 0.0030211356934159994 | Accuracy: 0.5450000166893005 | f1_macro : 0.07055016181229774


100%|██████████| 14/14 [00:04<00:00,  2.95it/s]
100%|██████████| 2/2 [00:00<00:00, 18.69it/s]


Train = Epochs: 4 | Loss: 0.0026292894035577774 | Accuracy: 0.5506582856178284 | f1_macro : 0.14401527347146087
Dev = Epochs: 4 | Loss: 0.0038733079563826323 | Accuracy: 0.3449999988079071 | f1_macro : 0.07923308780462594


100%|██████████| 14/14 [00:04<00:00,  2.98it/s]
100%|██████████| 2/2 [00:00<00:00, 15.19it/s]


Train = Epochs: 5 | Loss: 0.002257042797282338 | Accuracy: 0.6070406436920166 | f1_macro : 0.21299145296600175
Dev = Epochs: 5 | Loss: 0.003835872747004032 | Accuracy: 0.38499999046325684 | f1_macro : 0.08352933178450725


100%|██████████| 14/14 [00:05<00:00,  2.61it/s]
100%|██████████| 2/2 [00:00<00:00, 18.89it/s]


Train = Epochs: 6 | Loss: 0.001975931227207184 | Accuracy: 0.6838866472244263 | f1_macro : 0.29044308710970984
Dev = Epochs: 6 | Loss: 0.004376186057925224 | Accuracy: 0.2549999952316284 | f1_macro : 0.08103512854948726


100%|██████████| 14/14 [00:05<00:00,  2.70it/s]
100%|██████████| 2/2 [00:00<00:00, 20.21it/s]


Train = Epochs: 7 | Loss: 0.0018139061285182834 | Accuracy: 0.7095019817352295 | f1_macro : 0.24032411612872254
Dev = Epochs: 7 | Loss: 0.004342125728726387 | Accuracy: 0.4880000054836273 | f1_macro : 0.0734114689977542


100%|██████████| 14/14 [00:04<00:00,  2.87it/s]
100%|██████████| 2/2 [00:00<00:00, 17.44it/s]


Train = Epochs: 8 | Loss: 0.0017062234692275524 | Accuracy: 0.7179450392723083 | f1_macro : 0.338381067408751
Dev = Epochs: 8 | Loss: 0.004277134779840708 | Accuracy: 0.4259999990463257 | f1_macro : 0.09368972547027611


100%|██████████| 14/14 [00:04<00:00,  2.88it/s]
100%|██████████| 2/2 [00:00<00:00, 22.01it/s]


Train = Epochs: 9 | Loss: 0.001338838366791606 | Accuracy: 0.7821980714797974 | f1_macro : 0.3988431998715768
Dev = Epochs: 9 | Loss: 0.00476084602996707 | Accuracy: 0.31299999356269836 | f1_macro : 0.08739615405833108


100%|██████████| 14/14 [00:05<00:00,  2.67it/s]
100%|██████████| 2/2 [00:00<00:00, 15.99it/s]


Train = Epochs: 10 | Loss: 0.0012460948200896382 | Accuracy: 0.8038065433502197 | f1_macro : 0.3657707794817454
Dev = Epochs: 10 | Loss: 0.005554269999265671 | Accuracy: 0.18400000035762787 | f1_macro : 0.07299179951013351


100%|██████████| 2/2 [00:00<00:00, 15.41it/s]


Best Dev = Loss: 0.004277134779840708 | Accuracy: 0.4259999990463257 | f1_macro : 0.09368972547027611


## Dumped

In [439]:
evaluate(stance_model, stance_dev_dataset)

TypeError: evaluate() missing 2 required positional arguments: 'test_dataloader' and 'criterion'

In [None]:
evaluate(category_model, category_dev_dataset)

In [None]:
def train(model, train_dataset, batch_size=512, epochs=10, learning_rate=0.01):
  
  train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)

  # criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)
  criterion = nn.CrossEntropyLoss()

  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

  use_cuda = torch.cuda.is_available()
  device = torch.device("cuda" if use_cuda else "cpu")
  
  model = model.to(device)
  criterion = criterion.to(device)

  for epoch_num in range(epochs):
    total_acc_train = 0.0
    total_loss_train = 0.0

    for train_input, train_label, src_len in tqdm(train_dataloader):

      train_input = train_input.to(device).permute(1, 0)
      # print(train_input.shape)
      train_label = train_label.to(device)

      output = model(train_input, src_len)

      # print(output.shape, train_label.shape)
      
      batch_loss = criterion(output.view(-1, model.output_dim), train_label.view(-1))

      total_loss_train += batch_loss
      
      acc = torch.sum(torch.argmax(output, -1) == train_label) 
      total_acc_train += acc

      optimizer.zero_grad()

      batch_loss.backward()

      optimizer.step()
      
    epoch_loss = total_loss_train / len(train_dataset)

    epoch_acc = total_acc_train / len(train_dataset)

    print(
        f'Epochs: {epoch_num + 1} | Train Loss: {epoch_loss} \
        | Train Accuracy: {epoch_acc}\n')


In [None]:
def evaluate(model, test_dataset, batch_size=512):

  test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

  use_cuda = torch.cuda.is_available()
  device = torch.device("cuda" if use_cuda else "cpu")
  model = model.to(device)

  total_acc_test = 0.0
  
  with torch.no_grad():

    for test_input, test_label, src_len in tqdm(test_dataloader):

      test_input = test_input.to(device).permute(1, 0)
      test_label = test_label.to(device)


      output = model(test_input, src_len)

      acc = torch.sum(torch.argmax(output, -1)==test_label)
      total_acc_test += acc
    
    total_acc_test /= len(test_dataset)
  
  print(f'\nDev Accuracy: {total_acc_test}')