In [1]:
import torch, csv, emoji
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Sigmoid
import torch.optim as optim
from torchtext.data.metrics import bleu_score
import numpy as np
from transformers import AutoModel, AutoTokenizer 
from sklearn import model_selection
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MultiLabelBinarizer

from Bert_NN import read_csv, vectorize_sentences, convert_1d_str, convert_2d_str, emojis_accuracy, emojiDataset, toEmoji
from utils import DATAPATH

Using cpu device


In [2]:
torch.cuda.is_available()

False

In [3]:
features, labels, total, sentences, emojis, labels_classes = vectorize_sentences(DATAPATH)

Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaModel: ['lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.



-----------------------Begining Data Loading-----------------------

Loading data for file '01'.
Loaded 240 labelled sentence and emoji data.

Loading data for file '02'.
Loaded 227 labelled sentence and emoji data.

Loading data for file '03'.
Loaded 247 labelled sentence and emoji data.

Loading data for file '04'.
Loaded 248 labelled sentence and emoji data.

Loading data for file '05'.
Loaded 205 labelled sentence and emoji data.

Loaded total 1167 labelled sentence and emoji data in 0.0 minutes.

-----------------------Finished Data Loading-----------------------


-----------------------Begining Feature Extraction-----------------------
Extracted total 1167 feature and label data in 0.9 minutes.

-----------------------Finished Feature Extraction-----------------------



In [4]:
loss_func = nn.BCELoss()
S = Sigmoid()
epochs = 10
num_folds = 10
cross_validation = model_selection.KFold(n_splits=num_folds, random_state=None, shuffle=True)

bleu_max_n = 1
bleu_weights = [1]

best_to_emoji = None
best_score = -1

In [5]:
log = open('log.txt', 'w', encoding='utf-8')

In [6]:
for index, (train_index, test_index) in enumerate(cross_validation.split(features)):
    print(f'\nFold: {index+1}/{num_folds}')
    features_train, features_test = features[train_index], features[test_index]
    labels_train, labels_test = labels[train_index], labels[test_index]
    train_dataset = emojiDataset(features_train, labels_train)
    data_loader = DataLoader(dataset=train_dataset, batch_size=32)

    toemoji = toEmoji()
    learn_rate = optim.Adam(toemoji.parameters(), lr=0.005)

    for i in range(epochs):
        # print(f'\tEpoch: {i+1}/{epochs}')
        for data in data_loader:
            cur_train_features, cur_train_labels = data
            toemoji.zero_grad()
            results = toemoji(cur_train_features.float())
            S_results = S(results)
            loss = loss_func(S_results, cur_train_labels)
            loss.backward()
            learn_rate.step()
        # print('\t\tloss is: {:.4f}'.format(loss))
    
    test_dataset = emojiDataset(features_test, labels_test)
    data_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=True)
    total_BLEU = 0
    num_sample = 0
    total_accuracy = 0
    for data in data_loader:
        cur_test_features, cur_test_labels = data
        toemoji.zero_grad()
        results = toemoji(cur_test_features.float())

        # result_labels = []
        str_labels = convert_2d_str(cur_test_labels)
        for result_index, result in enumerate(results):
            cur_result_label = ['1' if elem.item()>0 else '0' for elem in result]
            total_accuracy += emojis_accuracy(cur_result_label, str_labels[result_index])
            # print(cur_result_label)
            # print(str_labels[result_index])
            total_BLEU += bleu_score(cur_result_label, str_labels[result_index], max_n=bleu_max_n, weights=bleu_weights)
            num_sample += 1
    average_BLUE = total_BLEU/num_sample
    print(f'\nAverage BLEU Score : {average_BLUE} And Accuracy: {total_accuracy/num_sample}')
    log.write('\hline\n')
    log.write('{} & {:.5f} & {:.5f} \\ \n'.format(index+1,1-(total_accuracy/num_sample),average_BLUE))
    if average_BLUE > best_score: best_to_emoji = toemoji


Fold: 1/10

Average BLEU Score : 0.9350427328011929 And Accuracy: 0.12920227920227922

Fold: 2/10

Average BLEU Score : 0.9259829027021033 And Accuracy: 0.15978835978835979

Fold: 3/10

Average BLEU Score : 0.9297435854235266 And Accuracy: 0.12435897435897436

Fold: 4/10

Average BLEU Score : 0.9264957222164186 And Accuracy: 0.09643874643874645

Fold: 5/10

Average BLEU Score : 0.9336752117189586 And Accuracy: 0.1643874643874644

Fold: 6/10

Average BLEU Score : 0.9340170881687067 And Accuracy: 0.11096866096866097

Fold: 7/10

Average BLEU Score : 0.9270085458062652 And Accuracy: 0.14002849002849005

Fold: 8/10

Average BLEU Score : 0.9291379292463434 And Accuracy: 0.11795977011494252

Fold: 9/10

Average BLEU Score : 0.9291379287325102 And Accuracy: 0.1553981937602627

Fold: 10/10

Average BLEU Score : 0.9336206871887733 And Accuracy: 0.1360632183908046


In [7]:
from collections import defaultdict

collection = defaultdict(int)
predicted = defaultdict(int)

total_dataset = emojiDataset(features, labels)

overall_accuracy = 0
overall_BLEU = 0

with torch.no_grad():
    for i, data in enumerate(total_dataset) :
        feature, label = data
        result = toemoji(feature.float())

        cur_result_label = ['1' if elem.item()>0 else '0' for elem in result]
        cur_real_label = convert_1d_str(label)
        cur_accu = emojis_accuracy(cur_result_label, cur_real_label)
        cur_BLEU = bleu_score(cur_result_label, cur_real_label, max_n=bleu_max_n, weights=bleu_weights)
        overall_accuracy += cur_accu
        overall_BLEU += cur_BLEU

        result = [index for index, elem in enumerate(result) if elem.item()>0]
        original_label_index = [index for index, elem in enumerate(label) if elem!=0]
        # print(original_label_index)
        for label_index in result:
            predicted[label_index] += 1
        for label_index in original_label_index:
            collection[label_index] += 1
        
        real_labels = [labels_classes[label_index] for label_index in result]
        log.write(f'Sentence: {sentences[i]}\nWith Bleu score: {cur_BLEU} And Accuracy: {cur_accu}\nOriginal labels: {emojis[i]}, output labels: {real_labels}\n')
        print(f'Sentence: {sentences[i]}\nWith Bleu score: {cur_BLEU} And Accuracy: {cur_accu}\nOriginal labels: {emojis[i]}, output labels: {real_labels}\n') 

for index, label in enumerate(labels_classes):
    log.write(f'Emoji: {label} has count: {collection[index]} predicted times: {predicted[index]}\n')
    print(f'Emoji: {label} has count: {collection[index]} predicted times: {predicted[index]}')

log.write(f'Overall Average BLEU Score: {overall_BLEU/len(total_dataset)} And Accuracy: {overall_accuracy/len(total_dataset)}')
print(f'Overall Average BLEU Score: {overall_BLEU/len(total_dataset)} And Accuracy: {overall_accuracy/len(total_dataset)}')

Sentence: eat then, idiot.
With Bleu score: 1.0 And Accuracy: 1.0
Original labels: ['🍳', '😡'], output labels: ['🍳', '😡']

Sentence: Loving you and having you  is  the most successful achievement in life ♥️✨
With Bleu score: 0.9800000190734863 And Accuracy: 0.75
Original labels: ['❤', '👨', '👉', '👨', '🎉'], output labels: ['❤', '🎉', '👨']

Sentence: Imagine not being arab 💀💀😭
With Bleu score: 0.9599999785423279 And Accuracy: 0.6666666666666666
Original labels: ['🤔', '❌', '👨'], output labels: ['❌', '🏃', '🤔']

Sentence: I’m going to a concert tonight and I’m so nervous🤩
With Bleu score: 0.8999999761581421 And Accuracy: 0.3333333333333333
Original labels: ['🎵', '🌛', '😍'], output labels: ['🌛', '🎉', '😄', '😣']

Sentence: gatekeeping painting…..
With Bleu score: 0.9599999785423279 And Accuracy: 0.0
Original labels: ['👀', '⛪'], output labels: []

Sentence: Not if I see you in water
With Bleu score: 0.9599999785423279 And Accuracy: 0.5
Original labels: ['❌', '👀', '👨', '💦'], output labels: ['👀', '👨'