In [23]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import pandas as pd
url = "https://raw.githubusercontent.com/jordanchtan/EvaluationData/master/AffectiveText/affectivetext_test_normalized.csv"
# url = "https://raw.githubusercontent.com/jordanchtan/EvaluationData/master/AffectiveTextPre/affectivetext_test_normalized.csv"
df = pd.read_csv(url, encoding='utf8')

sentences = df['message'].values
labels = df['reaction'].values

In [25]:
!pip install transformers



In [26]:
import torch

if torch.cuda.is_available():    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla P100-PCIE-16GB


In [0]:
import keras.backend as K
import tensorflow as tf

from scipy.spatial.distance import jensenshannon
from numpy import asarray

kl_div = tf.keras.losses.KLDivergence()
 
# calculate the js divergence
def js_divergence(p, q):
	m = 0.5 * (p + q)
	return 0.5 * kl_div(p, m) + 0.5 * kl_div(q, m)

def js_distance(y_true, y_pred):
  return K.sqrt(js_divergence(y_true, y_pred))


In [0]:
from transformers import DistilBertPreTrainedModel, DistilBertModel
import torch.nn as nn
from transformers import BertTokenizer, DistilBertTokenizer


class MyDistilBertForSequenceClassificationReact(DistilBertPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.distilbert = DistilBertModel(config)
        self.pre_classifier = nn.Linear(config.dim, config.dim)
        self.classifier = nn.Linear(config.dim, 5)
        # self.classifier = nn.Linear(config.dim, config.num_labels)
        self.dropout = nn.Dropout(config.seq_classif_dropout)

        self.init_weights()

    def forward(self, input_ids=None, attention_mask=None, head_mask=None, inputs_embeds=None, labels=None):
        
        distilbert_output = self.distilbert(
            input_ids=input_ids, attention_mask=attention_mask, head_mask=head_mask, inputs_embeds=inputs_embeds
        )
        hidden_state = distilbert_output[0]  # (bs, seq_len, dim)
        pooled_output = hidden_state[:, 0]  # (bs, dim)
        pooled_output = self.pre_classifier(pooled_output)  # (bs, dim)
        pooled_output = nn.ReLU()(pooled_output)  # (bs, dim)
        pooled_output = self.dropout(pooled_output)  # (bs, dim)
        logits = self.classifier(pooled_output)  # (bs, dim)
        # new addition jordan
        logits = nn.ReLU()(logits)
        outputs = (logits,) + distilbert_output[1:]
        if labels is not None:
            if self.num_labels == 1:
                loss_fct = nn.MSELoss()
                loss = loss_fct(logits.view(-1, 5), labels.view(-1, 5))
            else:
                loss_fct = nn.CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
            outputs = (loss,) + outputs

        return outputs  # (loss), logits, (hidden_states), (attentions)

class MyDistilBertForSequenceClassificationExpress(DistilBertPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.distilbert = DistilBertModel(config)
        self.pre_classifier = nn.Linear(config.dim, config.dim)
        self.classifier = nn.Linear(config.dim, 3)
        # self.classifier = nn.Linear(config.dim, config.num_labels)
        self.dropout = nn.Dropout(config.seq_classif_dropout)

        self.init_weights()

    def forward(self, input_ids=None, attention_mask=None, head_mask=None, inputs_embeds=None, labels=None):
        
        distilbert_output = self.distilbert(
            input_ids=input_ids, attention_mask=attention_mask, head_mask=head_mask, inputs_embeds=inputs_embeds
        )
        hidden_state = distilbert_output[0]  # (bs, seq_len, dim)
        pooled_output = hidden_state[:, 0]  # (bs, dim)
        pooled_output = self.pre_classifier(pooled_output)  # (bs, dim)
        pooled_output = nn.ReLU()(pooled_output)  # (bs, dim)
        pooled_output = self.dropout(pooled_output)  # (bs, dim)
        logits = self.classifier(pooled_output)  # (bs, dim)
        # new addition jordan
        logits = nn.ReLU()(logits)
        outputs = (logits,) + distilbert_output[1:]
        if labels is not None:
            if self.num_labels == 1:
                loss_fct = nn.MSELoss()
                loss = loss_fct(logits.view(-1, 3), labels.view(-1, 3))
            else:
                loss_fct = nn.CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
            outputs = (loss,) + outputs

        return outputs  # (loss), logits, (hidden_states), (attentions)



In [0]:
import pandas as pd
from torch.utils.data import TensorDataset, random_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

def predict_sentences(model, tokenizer, sentences):
  max_len = 36

  input_ids = []
  attention_masks = []

  for sent in sentences:
      encoded_dict = tokenizer.encode_plus(
                          sent,                      # Sentence to encode.
                          add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                          max_length = max_len,           # Pad & truncate all sentences.
                          pad_to_max_length = True,
                          return_attention_mask = True,   # Construct attn. masks.
                          return_tensors = 'pt',     # Return pytorch tensors.
                    )
      input_ids.append(encoded_dict['input_ids'])
      attention_masks.append(encoded_dict['attention_mask'])

  input_ids = torch.cat(input_ids, dim=0)
  attention_masks = torch.cat(attention_masks, dim=0)
  batch_size = 32
  prediction_data = TensorDataset(input_ids, attention_masks)
  prediction_sampler = SequentialSampler(prediction_data)
  prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)

  # Prediction on test set
  print('Predicting labels for {:,} test sentences...'.format(len(input_ids)))
  # Put model in evaluation mode
  model.eval()
  # Tracking variables 
  predictions = []
  # Predict 
  for batch in prediction_dataloader:
    batch = tuple(t.to(device) for t in batch)
    
    b_input_ids, b_input_mask = batch
    
    with torch.no_grad():
        outputs = model(b_input_ids, 
                        attention_mask=b_input_mask)

    logits = outputs[0]
    logits = logits.detach().cpu().numpy()
    predictions.append(logits)
  return predictions

In [0]:
# import pandas as pd
# import numpy as np

# # Load the dataset into a pandas dataframe.
# sentences_holdout = np.load("./drive/My Drive/Colab Notebooks/MainModels/sentences_holdout.npy",allow_pickle=True)
# labels_holdout = np.load("./drive/My Drive/Colab Notebooks/MainModels/labels_holdout.npy",allow_pickle=True)

In [0]:
# print(sentences_holdout.shape)
# print(labels_holdout.shape)

Load express base model

In [0]:
# Load a trained model and vocabulary that you have fine-tuned
output_dir = "./drive/My Drive/Colab Notebooks/MainModels/express_base_model"
model = MyDistilBertForSequenceClassificationExpress.from_pretrained(
    output_dir, # Use the 12-layer BERT model, with an uncased vocab.
    # num_labels = 5, # The number of output labels--2 for binary classification.
    num_labels = 1, # The number of output labels--2 for binary classification.
                    # You can increase this for multi-class tasks.   
    output_attentions = False, # Whether the model returns attentions weights.
    output_hidden_states = False, # Whether the model returns all hidden-states.
)
model.cuda()
tokenizer = DistilBertTokenizer.from_pretrained(output_dir,do_lower_case=True)


Express base model predictions

In [33]:
express_predictions = predict_sentences(model, tokenizer, sentences)

Predicting labels for 840 test sentences...


In [34]:
print(len(express_predictions))
print(express_predictions[0])

27
[[3.2002535 3.0154235 3.1236317]
 [2.4798014 3.1894133 3.0464993]
 [3.1983883 2.9973903 3.0309594]
 [3.274206  3.1051667 3.1556964]
 [2.2643116 3.202304  2.86701  ]
 [3.151666  3.1228383 3.0967882]
 [3.0537024 2.9844048 3.1118345]
 [2.9904726 2.9365482 3.1166155]
 [2.7489235 3.0442307 2.9661047]
 [2.9462383 2.9446058 3.0823064]
 [3.1305778 3.0579677 3.0316634]
 [2.985901  3.072596  3.191962 ]
 [3.1953058 3.183691  3.1172404]
 [2.9267178 2.9186056 3.0405316]
 [3.4037054 3.26381   3.2429748]
 [2.9579036 3.1233056 3.195844 ]
 [3.0429404 3.0380344 3.1899276]
 [2.1788538 3.1942031 2.8508978]
 [3.3466992 3.2805762 3.1490922]
 [2.9278162 2.8355424 3.0145154]
 [2.8877037 3.158996  3.242948 ]
 [2.5110693 3.1970377 3.173129 ]
 [2.915011  2.9948637 3.1189013]
 [3.3704855 3.144134  3.1737647]
 [3.2053916 2.9606693 3.1103935]
 [3.2126148 3.0696578 3.077311 ]
 [2.9808347 3.146453  3.097745 ]
 [2.4824417 3.2525227 3.115121 ]
 [3.0910156 2.927628  3.079396 ]
 [3.0222206 2.9433074 3.0787675]
 [2.944

In [0]:
# np.save('./express_predictions.npy', express_predictions,allow_pickle=True)


Load react base model


In [0]:
# Load a trained model and vocabulary that you have fine-tuned
output_dir = "./drive/My Drive/Colab Notebooks/MainModels/react_base_model"
model = MyDistilBertForSequenceClassificationReact.from_pretrained(
    output_dir, # Use the 12-layer BERT model, with an uncased vocab.
    # num_labels = 5, # The number of output labels--2 for binary classification.
    num_labels = 1, # The number of output labels--2 for binary classification.
                    # You can increase this for multi-class tasks.   
    output_attentions = False, # Whether the model returns attentions weights.
    output_hidden_states = False, # Whether the model returns all hidden-states.
)
model.cuda()
tokenizer = DistilBertTokenizer.from_pretrained(output_dir,do_lower_case=True)


React base model predictions

In [37]:
react_predictions = predict_sentences(model, tokenizer, sentences)

Predicting labels for 840 test sentences...


Combine

In [38]:
import numpy as np

react_predictions = np.vstack(react_predictions)
express_predictions = np.vstack(express_predictions)
print(react_predictions.shape)
print(express_predictions.shape)
data = np.concatenate((react_predictions, express_predictions),axis=1)
print(data.shape)

(840, 5)
(840, 3)
(840, 8)


Meta Learner

In [39]:
from keras.models import load_model
output_dir = "./drive/My Drive/Colab Notebooks/MainModels/"
dependencies = {
    'js_distance': js_distance
}
model = load_model(output_dir + 'ensemble_meta_learner.h5', dependencies)
predictions = model.predict(data)

def map_affect_reaction_to_index(reaction):
  reactions = {"joy": 0, "surprise": 1, "sadness": 2, "anger": 3}
  return reactions[reaction]

def map_fb_idx_2_affect_idx(idx):
  # love_count,wow_count,haha_count,sad_count,angry_count
  if idx == 0 or idx == 2:
    # joy
    return 0
  elif idx == 1:
    # surprise
    return 1
  elif idx == 3:
    # sadness
    return 2
  elif idx == 4:
    # anger
    return 3


labels_idx = list(map(lambda react: map_affect_reaction_to_index(react), labels))
# print(labels_idx[4])
# print(predictions[4])
predictions_argmax = list(map(lambda pred: np.argmax(pred), predictions))
# print(predictions_argmax[4])
predictions_idx = list(map(lambda idx: map_fb_idx_2_affect_idx(idx), predictions_argmax))

names = ["joy", "surprise", "sadness", "anger"]
print(sentences[800])
print(names)
print(labels_idx[800])
print(predictions_idx[800])
from sklearn import metrics

print(metrics.classification_report(labels_idx, predictions_idx, target_names=names))
# ex border patrol agent beaten in prison
# ['joy', 'surprise', 'sadness', 'anger']
# 2
# 2
#               precision    recall  f1-score   support

#          joy       0.60      0.87      0.71       362
#     surprise       0.34      0.16      0.22       184
#      sadness       0.81      0.50      0.61       202
#        anger       0.46      0.55      0.50        92

#     accuracy                           0.59       840
#    macro avg       0.55      0.52      0.51       840
# weighted avg       0.58      0.59      0.56       840

Ex-Border Patrol agent beaten in prison
['joy', 'surprise', 'sadness', 'anger']
2
3
              precision    recall  f1-score   support

         joy       0.59      0.80      0.68       362
    surprise       0.37      0.27      0.31       184
     sadness       0.76      0.54      0.63       202
       anger       0.48      0.34      0.39        92

    accuracy                           0.57       840
   macro avg       0.55      0.49      0.50       840
weighted avg       0.57      0.57      0.56       840



Manual test

In [0]:
affective_predictions = model.predict(data)