In [7]:
import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd
import numpy as np
import torch
import transformers
import inspect
import time
import logging
import tmunlp as nlp

from tqdm import trange, tqdm, tqdm_notebook, tqdm_pandas, tqdm_gui
from datetime import datetime
from tqdm import tqdm
from transformers import BertConfig, BertModel, BertTokenizer, BertForSequenceClassification, AdamW, BertPreTrainedModel
from transformers import get_constant_schedule_with_warmup
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import classification_report, accuracy_score, f1_score
from torch import nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)


'GeForce RTX 2080 Ti'

# 1. Load data & pre-processing

In [3]:
def preprocessing(df):
    """
    Preprocessing step
    As above dataframe heads, there is a lot of <br /> character 
    """
    df.sentence = df.sentence.str.replace('<br />','')
    df.sentence = df.sentence.str.lower()
    return df

train = pd.read_csv('./data/train.csv')
test = pd.read_csv('./data/test.csv')

train = preprocessing(train)
test = preprocessing(test)

print('Train data:\n{}\n\nTest data:\n{}'.format(train.head(5), test.head(5)))

Train data:
                                            sentence  sentiment  polarity
0  this is a very bland and inert production of o...          2         0
1  i've seen this film in avant-premiere at imagi...          7         1
2  revolt of the zombies (2 outta 5 stars) no, th...          4         0
3  may contain minor spoilers.dressed to kill, ha...          7         1
4  (spoilers)i shoulda figured. the dvd didn't ev...          2         0

Test data:
                                            sentence  sentiment  polarity
0  i loved this movie so much. i'm a big fan of a...         10         1
1  the stark, cold landscape of big sky country, ...          9         1
2  this cheapo exploitation flick is some genuine...          2         0
3  this movie has been promoting in everywhere in...          1         0
4  this is a great off-the-wall romantic comedy a...          8         1


# 2. Build model

In [4]:
class BertModelBonz():
    def __init__(self, load_model=None, load_config=None, model='bert-base-uncased', max_len=512, batch_size=6):
        self.pre_trained_model = model
        self.max_len = max_len
        self.batch_size = batch_size
        
        self.tokenizer = BertTokenizer.from_pretrained(self.pre_trained_model)
        self.tokenizer.max_len = max_len
        
        self.config = load_config
        self.model = BertForSequenceClassification.from_pretrained(self.pre_trained_model, config=self.config)
        self.optimizer = AdamW(params = self.model.parameters(), lr=1e-5)
    
    def create_ids(self, sentences):
        logging.getLogger("transformers.tokenization_utils").setLevel(logging.ERROR) #Disable tokenizer logs, it's really annoy
        input_ids = []
        for sen in tqdm_notebook(sentences, desc="Create Ids"):
            tmp = self.tokenizer.encode(sen)
            input_ids.append(tmp)
        input_ids = pad_sequences(input_ids, 
                                  maxlen=self.max_len, 
                                  dtype='int64', 
                                  truncating='post', 
                                  padding='post')
        return input_ids
    
    def prepare_data(self, input_ids, input_labels=None):
        input_ids = torch.tensor(self.create_ids(input_ids))
        if input_labels is None:
            return DataLoader(TensorDataset(input_ids), 
                              batch_size=self.batch_size)
        else:
            input_labels = torch.tensor(input_labels)
            return DataLoader(TensorDataset(input_ids, input_labels), 
                              batch_size=self.batch_size)
        
    def flat_accuracy(self, preds, labels):
        pred_flat = np.argmax(preds, axis=1).flatten()
        labels_flat = labels.flatten()
        return np.sum(pred_flat == labels_flat) / len(labels_flat)
    
    def train(self, dataloader, epochs=4):
        self.train_loss_set =[]
        for i in trange(epochs, desc="Epoch"):
            # Training model
            self.model.to(device)
            self.model.train()
            tr_loss = 0
            nb_tr_examples, nb_tr_steps = 0, 0
            for input_ids, input_labels in tqdm_notebook(dataloader):
                self.optimizer.zero_grad()
                loss = self.model(input_ids=input_ids.cuda(), labels=input_labels.cuda())[0]
                self.train_loss_set.append(loss)    
                loss.backward()
                self.optimizer.step()
                
                tr_loss += loss.item()
                nb_tr_examples += input_ids.size(0)
                nb_tr_steps += 1
            print("Train loss: {}".format(tr_loss/nb_tr_steps))

            # Evaluation
            self.model.eval()
            eval_loss, eval_accuracy = 0, 0
            nb_eval_steps, nb_eval_examples = 0, 0
            for input_ids, input_labels in dataloader:
                with torch.no_grad():
                    logits = self.model(input_ids.cuda())[0]
                logits = logits.detach().cpu().numpy()
                label_ids = input_labels.to('cpu').numpy()
                tmp_eval_accuracy = self.flat_accuracy(logits, label_ids)
                eval_accuracy += tmp_eval_accuracy
                nb_eval_steps += 1
            print("Validation Accuracy: {}".format(eval_accuracy/nb_eval_steps))
            
            '''
            #Save model for each epoch:
            filename = 'bert_512_epoch'+str(i)+'.sd'
            filepath = './model/bert/'+filename
            torch.save(self.model.state_dict(), filepath)
            '''
    def generate_cls_vectors(self, dataloader):
        self.model.to('cuda')
        self.model.eval()
        cls_vectors = []
        for input_ids, input_labels in tqdm_notebook(dataloader):
            with torch.no_grad():
                outputs = self.model(input_ids.cuda())
                last_hidden_layer = outputs[1][12]
                cls_vector = last_hidden_layer[:,0,:]
            cls_vector = cls_vector.detach().cpu().numpy()
            cls_vectors.extend(cls_vector)
        return cls_vectors
                
    
    def predict(self, test_data, test_labels):
        test_ids = self.create_ids(test_data)
        test_inputs = torch.tensor(test_ids)
        test_dataloader = DataLoader(test_inputs, batch_size=self.batch_size)
        
        # Preditcion
        self.model.to(device)
        self.model.eval()
        self.predictions = []
        for input_ids in tqdm_notebook(test_dataloader, desc="Predicting"):
            with torch.no_grad():
                logits = self.model(input_ids.to(device))[0] #This is for generate predict only
            logits = logits.detach().cpu().numpy()
            self.predictions.append(logits)
        return self.predictions
        self.predictions = [j for i in self.predictions for j in i]
        self.predictions = np.argmax(self.predictions, axis=1)
        print(classification_report(self.predictions, test_labels, digits=4))

        
        
#Create model
config = BertConfig(output_hidden_states = True)
bert_model = BertModelBonz(load_config=config, batch_size=6)
bert_model.model.config.output_hidden_states

True

## 2.1. Create train & test dataloader

In [5]:
NUM_DATA = 25000

train_dataloader = bert_model.prepare_data(input_ids=train.sentence[:NUM_DATA], input_labels=train.polarity[:NUM_DATA])
test_dataloader = bert_model.prepare_data(input_ids=test.sentence[:NUM_DATA], input_labels=test.polarity[:NUM_DATA])

HBox(children=(IntProgress(value=0, description='Create Ids', max=25000, style=ProgressStyle(description_width…




HBox(children=(IntProgress(value=0, description='Create Ids', max=25000, style=ProgressStyle(description_width…




## 2.2. Train model

In [6]:
#Train model

bert_model.train(train_dataloader)
#torch.save(bert_model.model, 'bert_eb1024_1e5_e4.pth')




Epoch:   0%|                                                                                     | 0/4 [00:00<?, ?it/s]

HBox(children=(IntProgress(value=0, max=4167), HTML(value='')))


Train loss: 0.2517798744246653
Validation Accuracy: 0.9592832573394203


Epoch:  25%|██████████████████▎                                                      | 1/4 [30:47<1:32:23, 1847.78s/it]

HBox(children=(IntProgress(value=0, max=4167), HTML(value='')))


Train loss: 0.12717088015052955
Validation Accuracy: 0.9837612990960756


Epoch:  50%|███████████████████████████████████▌                                   | 2/4 [1:01:39<1:01:38, 1849.09s/it]

HBox(children=(IntProgress(value=0, max=4167), HTML(value='')))


Train loss: 0.06726809934183979
Validation Accuracy: 0.9918006559475256


Epoch:  75%|██████████████████████████████████████████████████████▊                  | 3/4 [1:32:27<30:48, 1848.60s/it]

HBox(children=(IntProgress(value=0, max=4167), HTML(value='')))


Train loss: 0.045846033360617044
Validation Accuracy: 0.9831213502919807


Epoch: 100%|█████████████████████████████████████████████████████████████████████████| 4/4 [2:03:21<00:00, 1850.34s/it]


## 2.3. Predict test data

In [8]:
for i in range(4):
    filename = 'bert_512_epoch'+str(i)+'.sd'
    filepath = './model/bert/'+filename
    bert_model.model.load_state_dict(torch.load(filepath))
    bert_model.model.eval()
    print('Model ',filename,':')
    bert_model.predict(test_data=test.sentence, test_labels=test.polarity)

Model  bert_512_epoch0.sd :


HBox(children=(IntProgress(value=0, description='Create Ids', max=25000, style=ProgressStyle(description_width…




HBox(children=(IntProgress(value=0, description='Predicting', max=4167, style=ProgressStyle(description_width=…


              precision    recall  f1-score   support

           0     0.9084    0.9448    0.9262     12019
           1     0.9469    0.9118    0.9290     12981

   micro avg     0.9276    0.9276    0.9276     25000
   macro avg     0.9276    0.9283    0.9276     25000
weighted avg     0.9284    0.9276    0.9277     25000

Model  bert_512_epoch1.sd :


HBox(children=(IntProgress(value=0, description='Create Ids', max=25000, style=ProgressStyle(description_width…




HBox(children=(IntProgress(value=0, description='Predicting', max=4167, style=ProgressStyle(description_width=…


              precision    recall  f1-score   support

           0     0.9376    0.9228    0.9301     12701
           1     0.9215    0.9366    0.9290     12299

   micro avg     0.9296    0.9296    0.9296     25000
   macro avg     0.9296    0.9297    0.9296     25000
weighted avg     0.9297    0.9296    0.9296     25000

Model  bert_512_epoch2.sd :


HBox(children=(IntProgress(value=0, description='Create Ids', max=25000, style=ProgressStyle(description_width…




HBox(children=(IntProgress(value=0, description='Predicting', max=4167, style=ProgressStyle(description_width=…


              precision    recall  f1-score   support

           0     0.9002    0.9581    0.9282     11744
           1     0.9606    0.9059    0.9324     13256

   micro avg     0.9304    0.9304    0.9304     25000
   macro avg     0.9304    0.9320    0.9303     25000
weighted avg     0.9322    0.9304    0.9305     25000

Model  bert_512_epoch3.sd :


HBox(children=(IntProgress(value=0, description='Create Ids', max=25000, style=ProgressStyle(description_width…




HBox(children=(IntProgress(value=0, description='Predicting', max=4167, style=ProgressStyle(description_width=…


              precision    recall  f1-score   support

           0     0.9211    0.9444    0.9326     12192
           1     0.9458    0.9230    0.9343     12808

   micro avg     0.9334    0.9334    0.9334     25000
   macro avg     0.9334    0.9337    0.9334     25000
weighted avg     0.9337    0.9334    0.9335     25000



## 2.4. Generate CLS vectors

In [50]:
# Load state dict from trained model
filepath = './model/bert/bert_512_epoch2.sd'
bert_model.model.load_state_dict(torch.load(filepath))

cls_vectors = bert_model.generate_cls_vectors(train_dataloader)
cls_vectors_test = bert_model.generate_cls_vectors(test_dataloader)

HBox(children=(IntProgress(value=0, max=4167), HTML(value='')))




HBox(children=(IntProgress(value=0, max=4167), HTML(value='')))




# 3. Feature Extraction with TMUNLP

## 3.1. Build negative & positive vectors

In [51]:
result = nlp.get_label_term_weighting('tmunlp_file.txt', ['0','1'])
VECTOR_LEN = 50

#Create negative word list
negative_list =  nlp.get_keyword('0', result, VECTOR_LEN)
temp_max = negative_list[list(negative_list.keys())[0]]
for i in negative_list.keys():
    negative_list[i] = negative_list[i] #/ temp_max


#Create postive word list
positive_list =  nlp.get_keyword('1', result, VECTOR_LEN)
temp_max = positive_list[list(positive_list.keys())[0]]
for i in positive_list.keys():
    positive_list[i] = positive_list[i] #/ temp_max



def embedding_1hot(df, words):
    arr = []
    for i in range(df.shape[0]):
        temp = [words[word] if word in df.sentence[i] else 0 for word in words.keys()]
        arr.append(temp)
    return arr

train['negative_embedding'] = embedding_1hot(train, negative_list)
train['positive_embedding'] = embedding_1hot(train, positive_list)

test['negative_embedding'] = embedding_1hot(test, negative_list)
test['positive_embedding'] = embedding_1hot(test, positive_list)


## 3.2. Build bert_llr model

In [52]:
class BertLLR(nn.Module):
    def __init__(self, VECTOR_LEN):
        super(BertLLR, self).__init__()
        self.bert = nn.Linear(768,768)
        self.bert_activation = nn.Tanh()
        
        self.llr = nn.Linear(2*VECTOR_LEN, 2*VECTOR_LEN)
        self.llr_activation = nn.Tanh()
        
        self.dropout = nn.Dropout(0.1)
        
        self.classifier = nn.Linear(768+2*VECTOR_LEN, 2)
    
    def forward(self, cls_vectors=None, neg_embed=None, pos_embed=None, labels=None):
        tanh_cls_vectors = self.bert_activation(self.bert(cls_vectors))
        
        llr_vectors = torch.cat([neg_embed, pos_embed], dim=1)
        tanh_llr_vectors = self.llr_activation(self.llr(llr_vectors))
        
        concat_vectors = torch.cat([tanh_cls_vectors, tanh_llr_vectors], dim=1)
        concat_vectors = self.dropout(concat_vectors)
        
        logits = self.classifier(concat_vectors)
        
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, 2), labels.view(-1))
            outputs = (logits, loss)
        else:
            outputs = (logits,)
        
        return outputs
    
bert_llr_model = BertLLR(VECTOR_LEN)
bert_llr_optimizer = torch.optim.Adam(params = bert_llr_model.parameters(), 
                                      lr = 1e-5
                                     )


bert_llr_model   

BertLLR(
  (bert): Linear(in_features=768, out_features=768, bias=True)
  (bert_activation): Tanh()
  (llr): Linear(in_features=100, out_features=100, bias=True)
  (llr_activation): Tanh()
  (dropout): Dropout(p=0.1, inplace=False)
  (classifier): Linear(in_features=868, out_features=2, bias=True)
)

## 3.3. Train bert_llr model

In [53]:
dataloader = DataLoader(TensorDataset(torch.tensor(cls_vectors, dtype=torch.float),
                                      torch.tensor(train.negative_embedding, dtype=torch.float),
                                      torch.tensor(train.positive_embedding, dtype=torch.float),
                                      torch.tensor(train.polarity)
                                     ),
                        batch_size=16
                       )

dataloader_test = DataLoader(TensorDataset(torch.tensor(cls_vectors_test, dtype=torch.float),
                                          torch.tensor(test.negative_embedding, dtype=torch.float),
                                          torch.tensor(test.positive_embedding, dtype=torch.float),
                                          torch.tensor(test.polarity)
                                         ),
                            batch_size=16
                           )


bert_llr_model.to('cuda')
train_loss = []
train_accuracy = []
test_f1 = []
for _ in range(10):
    # Training
    bert_llr_model.train()
    tr_loss = []
    for cls_vector, neg_embed, pos_embed, labels in tqdm(dataloader, desc='Training'):
        #print(labels)
        bert_llr_optimizer.zero_grad()
        logits, loss = bert_llr_model(cls_vectors = cls_vector.cuda(), 
                                      neg_embed = neg_embed.cuda(), 
                                      pos_embed = pos_embed.cuda(), 
                                      labels = labels.cuda())
        loss.backward()
        bert_llr_optimizer.step()
        tr_loss.append(loss.item())
    loss_score = sum(tr_loss)/len(tr_loss)
    train_loss.append(loss_score)
    
    # Evaluation
    bert_llr_model.eval()
    predictions = []
    for cls_vector, neg_embed, pos_embed, labels in tqdm(dataloader, desc='Predicting'):
        with torch.no_grad():
            logits = bert_llr_model(cls_vectors = cls_vector.cuda(),
                                    neg_embed = neg_embed.cuda(),
                                    pos_embed = pos_embed.cuda())[0] #This is for generate predict only
        logits = logits.detach().cpu().numpy()
        predictions.append(logits)
    predictions = [j for i in predictions for j in i]
    predictions = np.argmax(predictions, axis=1)
    acc_score = accuracy_score(predictions, train.polarity)
    train_accuracy.append(acc_score)
    
    # Predict data
    predictions = []
    for cls_vector, neg_embed, pos_embed, labels in tqdm(dataloader_test, desc='Predicting'):
        with torch.no_grad():
            logits = bert_llr_model(cls_vectors = cls_vector.cuda(),
                                    neg_embed = neg_embed.cuda(),
                                    pos_embed = pos_embed.cuda())[0] #This is for generate predict only
        logits = logits.detach().cpu().numpy()
        predictions.append(logits)
    preds = [j for i in predictions for j in i]
    preds = np.argmax(preds, axis=1)
    test_f1.append(f1_score(preds, test.polarity, average='macro'))
    
    # Print result
    print('Train loss: ', loss_score)
    print('Train accuracy: ',acc_score)
    print(classification_report(preds, test.polarity, digits=4))
        

Training: 100%|███████████████████████████████████████████████████████████████████| 1563/1563 [00:07<00:00, 210.35it/s]
Predicting: 100%|█████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 957.54it/s]
Predicting: 100%|█████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 989.83it/s]


Train loss:  0.04289432667477041
Train accuracy:  0.99132
              precision    recall  f1-score   support

           0     0.9275    0.9405    0.9340     12327
           1     0.9414    0.9285    0.9349     12673

   micro avg     0.9344    0.9344    0.9344     25000
   macro avg     0.9344    0.9345    0.9344     25000
weighted avg     0.9345    0.9344    0.9344     25000



Training: 100%|███████████████████████████████████████████████████████████████████| 1563/1563 [00:07<00:00, 200.78it/s]
Predicting: 100%|█████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 914.56it/s]
Predicting: 100%|█████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 976.08it/s]


Train loss:  0.028587201890259772
Train accuracy:  0.99184
              precision    recall  f1-score   support

           0     0.9296    0.9404    0.9350     12356
           1     0.9411    0.9304    0.9357     12644

   micro avg     0.9354    0.9354    0.9354     25000
   macro avg     0.9354    0.9354    0.9354     25000
weighted avg     0.9354    0.9354    0.9354     25000



Training: 100%|███████████████████████████████████████████████████████████████████| 1563/1563 [00:07<00:00, 209.78it/s]
Predicting: 100%|█████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 939.67it/s]
Predicting: 100%|█████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 985.03it/s]


Train loss:  0.027634426325759822
Train accuracy:  0.99212
              precision    recall  f1-score   support

           0     0.9297    0.9407    0.9352     12353
           1     0.9414    0.9305    0.9359     12647

   micro avg     0.9356    0.9356    0.9356     25000
   macro avg     0.9356    0.9356    0.9356     25000
weighted avg     0.9356    0.9356    0.9356     25000



Training: 100%|███████████████████████████████████████████████████████████████████| 1563/1563 [00:07<00:00, 209.71it/s]
Predicting: 100%|█████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 933.99it/s]
Predicting: 100%|█████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 892.38it/s]


Train loss:  0.026827286918166732
Train accuracy:  0.99236
              precision    recall  f1-score   support

           0     0.9303    0.9406    0.9354     12364
           1     0.9412    0.9311    0.9361     12636

   micro avg     0.9358    0.9358    0.9358     25000
   macro avg     0.9358    0.9358    0.9358     25000
weighted avg     0.9358    0.9358    0.9358     25000



Training: 100%|███████████████████████████████████████████████████████████████████| 1563/1563 [00:07<00:00, 208.33it/s]
Predicting: 100%|█████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 917.53it/s]
Predicting: 100%|████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 1020.61it/s]


Train loss:  0.026376711717083983
Train accuracy:  0.99248
              precision    recall  f1-score   support

           0     0.9308    0.9410    0.9359     12365
           1     0.9416    0.9315    0.9365     12635

   micro avg     0.9362    0.9362    0.9362     25000
   macro avg     0.9362    0.9363    0.9362     25000
weighted avg     0.9363    0.9362    0.9362     25000



Training: 100%|███████████████████████████████████████████████████████████████████| 1563/1563 [00:07<00:00, 210.99it/s]
Predicting: 100%|█████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 913.17it/s]
Predicting: 100%|█████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 969.81it/s]


Train loss:  0.025785924838194463
Train accuracy:  0.9926
              precision    recall  f1-score   support

           0     0.9308    0.9414    0.9361     12359
           1     0.9421    0.9316    0.9368     12641

   micro avg     0.9364    0.9364    0.9364     25000
   macro avg     0.9364    0.9365    0.9364     25000
weighted avg     0.9365    0.9364    0.9364     25000



Training: 100%|███████████████████████████████████████████████████████████████████| 1563/1563 [00:07<00:00, 211.18it/s]
Predicting: 100%|█████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 955.13it/s]
Predicting: 100%|█████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 947.64it/s]


Train loss:  0.02529535801772574
Train accuracy:  0.9928
              precision    recall  f1-score   support

           0     0.9314    0.9415    0.9365     12366
           1     0.9422    0.9322    0.9371     12634

   micro avg     0.9368    0.9368    0.9368     25000
   macro avg     0.9368    0.9369    0.9368     25000
weighted avg     0.9369    0.9368    0.9368     25000



Training: 100%|███████████████████████████████████████████████████████████████████| 1563/1563 [00:07<00:00, 212.35it/s]
Predicting: 100%|████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 1002.78it/s]
Predicting: 100%|████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 1016.57it/s]


Train loss:  0.02507446512939815
Train accuracy:  0.9928
              precision    recall  f1-score   support

           0     0.9313    0.9418    0.9365     12360
           1     0.9425    0.9320    0.9372     12640

   micro avg     0.9369    0.9369    0.9369     25000
   macro avg     0.9369    0.9369    0.9369     25000
weighted avg     0.9369    0.9369    0.9369     25000



Training: 100%|███████████████████████████████████████████████████████████████████| 1563/1563 [00:07<00:00, 215.80it/s]
Predicting: 100%|█████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 967.02it/s]
Predicting: 100%|█████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 989.45it/s]


Train loss:  0.024683403498323316
Train accuracy:  0.99304
              precision    recall  f1-score   support

           0     0.9311    0.9417    0.9364     12360
           1     0.9423    0.9319    0.9371     12640

   micro avg     0.9367    0.9367    0.9367     25000
   macro avg     0.9367    0.9368    0.9367     25000
weighted avg     0.9368    0.9367    0.9367     25000



Training: 100%|███████████████████████████████████████████████████████████████████| 1563/1563 [00:07<00:00, 215.07it/s]
Predicting: 100%|█████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 995.13it/s]
Predicting: 100%|█████████████████████████████████████████████████████████████████| 1563/1563 [00:01<00:00, 948.09it/s]


Train loss:  0.024361303694646327
Train accuracy:  0.99304
              precision    recall  f1-score   support

           0     0.9316    0.9416    0.9366     12367
           1     0.9422    0.9323    0.9373     12633

   micro avg     0.9369    0.9369    0.9369     25000
   macro avg     0.9369    0.9370    0.9369     25000
weighted avg     0.9370    0.9369    0.9369     25000



In [54]:
print(train_loss)
print(train_accuracy)
print(test_f1)

[0.04289432667477041, 0.028587201890259772, 0.027634426325759822, 0.026827286918166732, 0.026376711717083983, 0.025785924838194463, 0.02529535801772574, 0.02507446512939815, 0.024683403498323316, 0.024361303694646327]
[0.99132, 0.99184, 0.99212, 0.99236, 0.99248, 0.9926, 0.9928, 0.9928, 0.99304, 0.99304]
[0.9344368604172726, 0.9353578553287809, 0.9355577719486305, 0.9357580988508742, 0.936198139537749, 0.9364379781175087, 0.9367981842371138, 0.9368780204947227, 0.9367180154769654, 0.9369182146340778]


## 3.4. Predict test data

In [70]:
dataloader = DataLoader(TensorDataset(torch.tensor(cls_vectors_test, dtype=torch.float),
                                      torch.tensor(test.negative_embedding, dtype=torch.float),
                                      torch.tensor(test.positive_embedding, dtype=torch.float),
                                      torch.tensor(test.polarity)
                                     ),
                        batch_size=16
                       )

bert_llr_model.to('cuda')
bert_llr_model.eval()

predictions = []
for cls_vector, neg_embed, pos_embed, labels in tqdm_notebook(dataloader, desc='Predicting'):
    with torch.no_grad():
        logits = bert_llr_model(cls_vectors = cls_vector.cuda(),
                                neg_embed = neg_embed.cuda(),
                                pos_embed = pos_embed.cuda())[0] #This is for generate predict only
    logits = logits.detach().cpu().numpy()
    predictions.append(logits)


preds = [j for i in predictions for j in i]
preds = np.argmax(preds, axis=1)
print(classification_report(preds, test.polarity, digits=4))



HBox(children=(IntProgress(value=0, description='Predicting', max=1563, style=ProgressStyle(description_width=…


              precision    recall  f1-score   support

           0     0.9366    0.9342    0.9354     12533
           1     0.9340    0.9365    0.9352     12467

   micro avg     0.9353    0.9353    0.9353     25000
   macro avg     0.9353    0.9353    0.9353     25000
weighted avg     0.9353    0.9353    0.9353     25000



In [22]:
#predictions = np.argmax(predictions, axis=1)
predictions

[array([[-4.2193394,  3.7351823],
        [-3.181842 ,  2.335875 ],
        [ 4.662401 , -4.356788 ],
        [ 4.6635222, -4.4361706],
        [-3.9928486,  3.5073686],
        [-3.7379181,  3.2141714],
        [-4.302698 ,  3.4984376],
        [ 4.206358 , -3.9277744],
        [-3.7600272,  3.2077901],
        [-2.8281696,  2.710023 ],
        [ 4.7665157, -4.459503 ],
        [ 4.373512 , -4.0410233],
        [ 4.2152157, -4.059046 ],
        [ 4.562008 , -4.4501204],
        [-3.343133 ,  2.8380086],
        [-2.024254 ,  1.4490646]], dtype=float32),
 array([[-3.910925  ,  3.0488214 ],
        [-4.0014453 ,  3.632685  ],
        [ 2.039426  , -2.2094398 ],
        [-4.0950427 ,  3.298696  ],
        [-4.1797204 ,  3.6013682 ],
        [-0.98534167,  0.49423742],
        [-4.3870306 ,  3.630054  ],
        [ 4.8673215 , -4.5039706 ],
        [-3.2610476 ,  2.8844736 ],
        [-3.725003  ,  3.0005918 ],
        [ 4.9135327 , -4.568438  ],
        [ 4.5231524 , -4.507228  ],
       

In [26]:
bert_llr_model.state_dict()

OrderedDict([('bert.weight',
              tensor([[ 0.0269,  0.0220,  0.0104,  ..., -0.0083, -0.0231,  0.0219],
                      [-0.0120,  0.0266,  0.0331,  ...,  0.0040, -0.0069, -0.0142],
                      [-0.0344,  0.0099,  0.0104,  ..., -0.0116, -0.0247,  0.0287],
                      ...,
                      [ 0.0363, -0.0238,  0.0232,  ..., -0.0255, -0.0226,  0.0181],
                      [ 0.0081,  0.0146,  0.0363,  ...,  0.0259, -0.0111, -0.0153],
                      [ 0.0218,  0.0104, -0.0342,  ..., -0.0129,  0.0072, -0.0209]],
                     device='cuda:0')),
             ('bert.bias',
              tensor([ 5.0368e-03,  3.1403e-02,  1.6484e-02, -3.2630e-02, -3.2012e-02,
                      -2.1757e-02,  1.7187e-02, -7.9305e-03,  1.8291e-02,  2.6600e-02,
                       1.0693e-02,  9.7253e-04,  1.7721e-02, -2.3223e-02, -1.9580e-02,
                      -2.1894e-02,  6.1231e-03, -3.4022e-02, -1.6071e-04, -3.5636e-02,
                       2

In [6]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')
input_ids = torch.tensor([tokenizer.encode("Hello, my dog is cute", add_special_tokens=True),tokenizer.encode("Hello, my cat is good", add_special_tokens=True)])  # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0]
last_hidden_states[]

tensor([[[-1.1437e-01,  1.9371e-01,  1.2496e-01,  ..., -3.8269e-01,
           2.1066e-01,  5.4071e-01],
         [ 5.3082e-01,  3.2075e-01,  3.6646e-01,  ..., -3.6060e-03,
           7.5786e-01,  3.8844e-02],
         [-4.8765e-01,  8.8492e-01,  4.2556e-01,  ..., -6.9762e-01,
           4.4583e-01,  1.2309e-01],
         ...,
         [-7.0028e-01, -1.8151e-01,  3.2970e-01,  ..., -4.8379e-01,
           6.8023e-02,  8.9008e-01],
         [-1.0355e+00, -2.5668e-01, -3.1652e-02,  ...,  3.1974e-01,
           3.9990e-01,  1.7955e-01],
         [ 6.0799e-01,  2.6097e-01, -3.1307e-01,  ...,  3.1100e-02,
          -6.2827e-01, -1.9943e-01]],

        [[-1.6035e-01,  1.8238e-01,  2.0050e-01,  ..., -2.1920e-01,
           2.9376e-01,  4.0928e-01],
         [ 5.8648e-01,  2.7657e-01,  3.1651e-01,  ...,  7.8693e-02,
           8.6146e-01,  2.3258e-01],
         [-3.1646e-01,  7.9635e-01,  3.2656e-01,  ..., -5.9959e-01,
           4.2568e-01, -7.1605e-04],
         ...,
         [-6.9300e-01, -4

In [13]:
last_hidden_states[:,0,:]

tensor(-7.3864, grad_fn=<SelectBackward>)

In [49]:
a = torch.tensor([[0.0,1.0],[0.0,1.0]])
a

tensor([[0., 1.],
        [0., 1.]])

In [50]:
loss_fct = nn.CrossEntropyLoss()
loss_fct(a.view(-1,2), torch.tensor([1,1]).view(-1))

tensor(0.3133)