In [1]:
import os
import sys
import json
import re
import itertools

import numpy as np
import os.path as osp

import time
import torch
import csv
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

from torch.autograd import Variable

from torch import nn
import torch.nn.functional as F
from tqdm import tqdm
from random import shuffle
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import f1_score, recall_score, precision_score, confusion_matrix

from keras.preprocessing.sequence import pad_sequences
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

from transformers import BertModel, BertTokenizer, DistilBertTokenizer
from transformers import BertForSequenceClassification, AdamW, BertConfig

In [2]:
import dgl
from dgl import DGLGraph
from dgl.data import MiniGCDataset

# Load Pytorch as backend
dgl.load_backend('pytorch')

Using backend: pytorch
Using backend: pytorch


In [3]:
from dgl.nn.pytorch import conv as dgl_conv
from dgl.data import citegrh

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Dataset Configs

In [5]:
EDGE_TYPE = {
    'bidirectional': 1,
    'input2COMET': 2,
    'COMET2input': 3
}

In [6]:
configs = {
    'news_headline' : {
        'dataset_path': 'data/NewsHeadline_comet_autocomplete.jsonl',
        'model_name': 'distilbert',
        'model_save_point': 'kaggle-news',
        'save_point': 'kaggle-news-gcn',
        'epochs': 5,
        'test_size': 0.5,
        'edge_type': EDGE_TYPE['bidirectional']
    },
    
    'semeval' : {
        'dataset_path': 'data/SemEval_comet_autocomplete.jsonl',
        'model_name': 'distilbert',
        'model_save_point': 'semeval',
        'save_point': 'semeval-gcn',
        'epochs': 10,
        'test_size': 0.2,
        'edge_type': EDGE_TYPE['COMET2input']
    },
    
    'figlang' : {
        'dataset_path': 'data/FigLang_comet_autocomplete.jsonl',
        'model_name': 'distilbert',
        'model_save_point': 'figlang',
        'save_point': 'figlang-gcn',
        'epochs': 10,
        'test_size': 0.2,
        'edge_type': EDGE_TYPE['bidirectional']
    },
}

In [7]:
config = configs['figlang']

### Dataset Load

In [8]:
def load_dataset(filename):
    data = []
    with open(filename) as f:
        for line in f:
            entry = {}
            entry['sentences'] = []
            
            line = line.strip()
            d = json.loads(line)
            
            entry['sentences'].append(d['sentence'])
            entry['label'] = int(d['label'])
            
            for k in d['common_sense'].keys():
                if k == 'xWant' or k == 'xEffect':
                    entry['sentences'].append(d['common_sense'][k])
            data.append(entry)
                
    return data    

In [9]:
dataset = load_dataset(config['dataset_path'])

### Pre-trained BERT model

In [10]:
def get_attn(input_ids):
    attention_masks = []

    for sent in input_ids:
        att_mask = [int(token_id > 0) for token_id in sent]
        attention_masks.append(att_mask)
    return attention_masks

In [11]:
# Load BertForSequenceClassification, the pretrained BERT model with a single linear classification layer on top. 
tokenizer = DistilBertTokenizer.from_pretrained(config['model_name'] + '-base-uncased', do_lower_case=True)

bert_model = torch.load('model/distilbert-' + config['model_save_point'] + '.pb')
bert_model.to(device)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
       

## Load dataset from pickle dump

In [12]:
def get_pickle_file(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)

In [13]:
MAX_LEN = 128

def create_dataset_cached(dataset):
    all_data = []
    
    for data, label in tqdm(dataset):
        input_ids = []
        
        input_ids.append(data['sentence'])
        
        for s in data['support']:
            input_ids.append(s)
            
        input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype="long", value=0, truncating="post", padding="post")
        att_mask = torch.tensor(get_attn(input_ids))
        input_ids = torch.tensor(input_ids)
        

        loss, hidden_state = bert_model(input_ids.to(device), attention_mask=att_mask.to(device), output_hidden_states=True)
        
        output = hidden_state[-1][:, 0, :].detach().to('cpu')
        torch.cuda.empty_cache()

        if config['edge_type'] == EDGE_TYPE['bidirectional']:
            graph = dgl.graph((torch.tensor([0, 0, 1, 2]), 
                               torch.tensor([1, 2, 0, 0])))
        elif config['edge_type'] == EDGE_TYPE['input2COMET']:
            graph = dgl.graph((torch.tensor([0, 0]), 
                               torch.tensor([1, 2])))
        elif config['edge_type'] == EDGE_TYPE['COMET2input']:
            graph = dgl.graph((torch.tensor([1, 2]), 
                               torch.tensor([0, 0])))
            
        graph.ndata['x'] = torch.tensor(output)
        all_data.append((graph, label, data['raw_sentence']))
    return all_data

In [14]:
trainset_, validationset_ = get_pickle_file('model/trainset-' + config['model_save_point'] +'.data'), get_pickle_file('model/validationset-' + config['model_save_point'] +'.data')

In [15]:
trainset, validationset = create_dataset_cached(trainset_), create_dataset_cached(validationset_)

  graph.ndata['x'] = torch.tensor(output)
100%|██████████| 3517/3517 [00:21<00:00, 165.71it/s]
100%|██████████| 880/880 [00:05<00:00, 168.14it/s]


### Training Data generation

In [16]:
def collate(samples):
    # The input `samples` is a list of pairs
    #  (graph, label).
    graphs, labels, raw_sentence = map(list, zip(*samples))
    batched_graph = dgl.batch(graphs)
    return batched_graph, torch.tensor([labels]), raw_sentence

In [17]:
BATCH_SIZE = 16

In [18]:
train_loader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate)
test_loader = DataLoader(validationset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate)

## Basic SAGE Model

In [19]:
class GraphSAGEModel(nn.Module):
    def __init__(self,
                 in_feats,
                 n_hidden,
                 out_dim,
                 n_layers,
                 activation,
                 dropout,
                 aggregator_type):
        super(GraphSAGEModel, self).__init__()
        self.node_count = 3
        self.layers = nn.ModuleList()
        self.index = torch.tensor([1, 2]).to(device)

        # input layer
        self.layers.append(dgl_conv.SAGEConv(768, n_hidden, aggregator_type,
                                         feat_drop=dropout, activation=None))
        self.lin1 = nn.Linear(64, 5)
        self.lin2 = nn.Linear(5 * self.node_count, 2)

    def forward(self, g, features):
        h = features
        for layer in self.layers:
            h = layer(g, h)  
            
        x = self.lin1(h.view(-1, self.node_count, 64))
        x = self.lin2(x.view(-1, 5 * self.node_count))
        
        return F.log_softmax(x.view(-1, 2), dim=-1)

In [20]:
# Hyperparameters
n_hidden = 64
n_layers = 2
dropout = 0.5
aggregator_type = 'gcn'
n_classes = 2
in_feats = trainset[0][0].ndata['x'].shape[1]

def reset_model():
    weight_decay = 5e-4
    lr = 2e-3
    neg_sample_size = 100

    model = GraphSAGEModel(in_feats,
                             n_hidden,
                             n_classes,
                             n_layers,
                             F.relu,
                             dropout,
                             aggregator_type)

    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    return model, optimizer

In [21]:
# use optimizer
criteria = nn.NLLLoss()

### Evaluation

In [22]:
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [23]:
def train(model, optimizer, epoch):
    model.train()
    
    
    eval_accuracy = 0
    nb_eval_steps = 0
    
    for batch, (g, label, _) in enumerate(train_loader):
        g = g.to(device)
        output = model(g, g.ndata['x'])
        loss = criteria(output, label.view(-1).to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 40 == 0:
            print("Epoch {:d} |Batch {:d} | Loss {:.4f} ".format(epoch, batch, loss.item()))
        eval_accuracy += loss.item()
        nb_eval_steps += 1
            
    
    print("Average Training Loss: {0:.4f}".format(eval_accuracy/nb_eval_steps)) 

In [24]:
def test(model, test_loader):   
    print("")
    print("Running Validation...")
    
    eval_accuracy = 0
    nb_eval_steps = 0
    
    model.eval()
    
    all_predictions = []
    all_labels = []
    
    for batch, (g, label, _) in enumerate(test_loader):
        g = g.to(device)
        
        with torch.no_grad():
            output = model(g, g.ndata['x']) 
        

        logits = output.detach().cpu().numpy()
        label_ids = label.to('cpu').numpy()
        
        prediction = list(np.argmax(logits, axis=1).flatten())
        all_predictions.extend(prediction)
        all_labels.extend(label_ids.flatten())
        
        tmp_eval_accuracy = flat_accuracy(logits, label_ids)
        
        eval_accuracy += tmp_eval_accuracy
        nb_eval_steps += 1
        
    
    accuracy = eval_accuracy/nb_eval_steps
    
    print("   Accuracy: {0:.4f}".format(accuracy))
    
    f1 = f1_score(all_predictions, all_labels, average = 'macro')
    precision = precision_score(all_predictions, all_labels, average = 'macro')
    recall = recall_score(all_predictions, all_labels, average = 'macro')
        
    print("  F1-score: {0:.4f}".format(f1))
    print("  Precision: {0:.4f}".format(precision))
    print("  Recall: {0:.4f}".format(recall))
    print()

    matrix = confusion_matrix(all_predictions, all_labels)

    tp = matrix[0][0]
    fp = matrix[0][1]
    fn = matrix[1][0]
    tn = matrix[1][1]
    
    sarcastic_precision = tp/ (tp + fp)
    sarcastic_f1 = 2*tp / (2*tp + fn + fp)
    sarcastic_recall = tp / (tp + fn)
    
    print("  Sarcastic Precision: {0:.4f}".format(sarcastic_precision))
    print("  Sarcastic F1-score: {0:.4f}".format(sarcastic_f1))
    print("  Sarcastic Recall: {0:.4f}".format(sarcastic_recall)) 
    
    print()
    
    
    nonsarcastic_precision = tn / (tn + fn)
    nonsarcastic_f1 = 2*tn / (2*tn + fn + fp)
    nonsarcastic_recall = tn / (tn + fp)
    
    print("  Non-sarcastic Precision: {0:.4f}".format(nonsarcastic_precision))
    print("  Non-Sarcastic F1-score: {0:.4f}".format(nonsarcastic_f1))
    print("  Non-sarcasm Recall: {0:.4f}".format(nonsarcastic_recall))

    return [f1, precision, recall, sarcastic_f1, sarcastic_precision, sarcastic_recall, nonsarcastic_f1, nonsarcastic_precision, nonsarcastic_recall]

In [25]:
def save_model(filename = 'model/distilbert-'+ config['save_point'] +'.pb'):
    print('Saving model...')
    torch.save(model, filename)

In [26]:
def runner(model, optimizer):
    best_model = model
    best_results = [0, 0, 0, 0, 0, 0, 0, 0, 0]

    for epoch in range(config['epochs']):
        print('======== Epoch {:} / {:} ========'.format(epoch + 1, config['epochs']))
        print('Training...')
        train(model, optimizer, epoch)
        results = test(model, test_loader)
        if results[0] > best_results[0]:
            print('Saving ...')
            best_results = results
            save_model()
            print('Done!')

    print("")
    print("Training complete!")
    return best_results

In [27]:
all_results = []
t0 = time.time()

total_iterations = 5
for iteration in range(total_iterations):
    print("  ==========Iteration: {:d}".format(iteration))
    model, optimizer = reset_model()

    result = runner(model, optimizer)
    all_results.append(result)

final_results = [sum(value)/3 for value in zip(all_results[0], all_results[1], all_results[2])]

print("  F1-score: {0:.4f}".format(final_results[0]))
print("  Precision: {0:.4f}".format(final_results[1]))
print("  Recall: {0:.4f}".format(final_results[2]))
print()

print("  Sarcastic Precision: {0:.4f}".format(final_results[3]))
print("  Sarcastic F1-score: {0:.4f}".format(final_results[4]))
print("  Sarcastic Recall: {0:.4f}".format(final_results[5])) 

print()

print("  Non-sarcastic Precision: {0:.4f}".format(final_results[6]))
print("  Non-Sarcastic F1-score: {0:.4f}".format(final_results[7]))
print("  Non-sarcasm Recall: {0:.4f}".format(final_results[8]))

Training...
Epoch 0 |Batch 0 | Loss 0.8774 
Epoch 0 |Batch 40 | Loss 0.4599 
Epoch 0 |Batch 80 | Loss 0.5754 
Epoch 0 |Batch 120 | Loss 0.7321 
Epoch 0 |Batch 160 | Loss 0.6820 
Epoch 0 |Batch 200 | Loss 0.3441 
Average Training Loss: 0.5600

Running Validation...
   Accuracy: 0.6614
  F1-score: 0.6603
  Precision: 0.6614
  Recall: 0.6635

  Sarcastic Precision: 0.6449
  Sarcastic F1-score: 0.6796
  Sarcastic Recall: 0.7182

  Non-sarcastic Precision: 0.6821
  Non-Sarcastic F1-score: 0.6410
  Non-sarcasm Recall: 0.6045
Saving ...
Saving model...
Done!
Training...
Epoch 1 |Batch 0 | Loss 0.6500 
Epoch 1 |Batch 40 | Loss 0.5393 
Epoch 1 |Batch 80 | Loss 0.5049 
Epoch 1 |Batch 120 | Loss 0.7063 
Epoch 1 |Batch 160 | Loss 0.3607 
Epoch 1 |Batch 200 | Loss 0.5180 
Average Training Loss: 0.5310

Running Validation...
   Accuracy: 0.6636
  F1-score: 0.6617
  Precision: 0.6636
  Recall: 0.6674

  Sarcastic Precision: 0.6423
  Sarcastic F1-score: 0.6871
  Sarcastic Recall: 0.7386

  Non-sarcast

Epoch 4 |Batch 120 | Loss 0.4931 
Epoch 4 |Batch 160 | Loss 0.4669 
Epoch 4 |Batch 200 | Loss 0.9055 
Average Training Loss: 0.5265

Running Validation...
   Accuracy: 0.6705
  F1-score: 0.6704
  Precision: 0.6705
  Recall: 0.6705

  Sarcastic Precision: 0.6736
  Sarcastic F1-score: 0.6674
  Sarcastic Recall: 0.6614

  Non-sarcastic Precision: 0.6674
  Non-Sarcastic F1-score: 0.6734
  Non-sarcasm Recall: 0.6795
Saving ...
Saving model...
Done!
Training...
Epoch 5 |Batch 0 | Loss 0.5582 
Epoch 5 |Batch 40 | Loss 0.4946 
Epoch 5 |Batch 80 | Loss 0.4281 
Epoch 5 |Batch 120 | Loss 0.4294 
Epoch 5 |Batch 160 | Loss 0.6366 
Epoch 5 |Batch 200 | Loss 0.4252 
Average Training Loss: 0.5241

Running Validation...
   Accuracy: 0.6716
  F1-score: 0.6716
  Precision: 0.6716
  Recall: 0.6716

  Sarcastic Precision: 0.6697
  Sarcastic F1-score: 0.6734
  Sarcastic Recall: 0.6773

  Non-sarcastic Precision: 0.6736
  Non-Sarcastic F1-score: 0.6697
  Non-sarcasm Recall: 0.6659
Saving ...
Saving model...


Epoch 9 |Batch 40 | Loss 0.4930 
Epoch 9 |Batch 80 | Loss 0.3268 
Epoch 9 |Batch 120 | Loss 0.4098 
Epoch 9 |Batch 160 | Loss 0.4690 
Epoch 9 |Batch 200 | Loss 0.4737 
Average Training Loss: 0.5131

Running Validation...
   Accuracy: 0.6670
  F1-score: 0.6670
  Precision: 0.6670
  Recall: 0.6671

  Sarcastic Precision: 0.6682
  Sarcastic F1-score: 0.6659
  Sarcastic Recall: 0.6636

  Non-sarcastic Precision: 0.6659
  Non-Sarcastic F1-score: 0.6682
  Non-sarcasm Recall: 0.6705

Training complete!
Training...
Epoch 0 |Batch 0 | Loss 0.7703 
Epoch 0 |Batch 40 | Loss 0.3547 
Epoch 0 |Batch 80 | Loss 0.6561 
Epoch 0 |Batch 120 | Loss 0.5586 
Epoch 0 |Batch 160 | Loss 0.4479 
Epoch 0 |Batch 200 | Loss 0.5932 
Average Training Loss: 0.5473

Running Validation...
   Accuracy: 0.6534
  F1-score: 0.6532
  Precision: 0.6534
  Recall: 0.6538

  Sarcastic Precision: 0.6611
  Sarcastic F1-score: 0.6449
  Sarcastic Recall: 0.6295

  Non-sarcastic Precision: 0.6464
  Non-Sarcastic F1-score: 0.6615
  N

Epoch 4 |Batch 40 | Loss 0.7303 
Epoch 4 |Batch 80 | Loss 0.3754 
Epoch 4 |Batch 120 | Loss 0.4681 
Epoch 4 |Batch 160 | Loss 0.4032 
Epoch 4 |Batch 200 | Loss 0.5454 
Average Training Loss: 0.5264

Running Validation...
   Accuracy: 0.6739
  F1-score: 0.6732
  Precision: 0.6739
  Recall: 0.6752

  Sarcastic Precision: 0.6597
  Sarcastic F1-score: 0.6877
  Sarcastic Recall: 0.7182

  Non-sarcastic Precision: 0.6908
  Non-Sarcastic F1-score: 0.6587
  Non-sarcasm Recall: 0.6295
Saving ...
Saving model...
Done!
Training...
Epoch 5 |Batch 0 | Loss 0.6379 
Epoch 5 |Batch 40 | Loss 0.2624 
Epoch 5 |Batch 80 | Loss 0.5571 
Epoch 5 |Batch 120 | Loss 0.5046 
Epoch 5 |Batch 160 | Loss 0.4874 
Epoch 5 |Batch 200 | Loss 0.5412 
Average Training Loss: 0.5151

Running Validation...
   Accuracy: 0.6716
  F1-score: 0.6709
  Precision: 0.6716
  Recall: 0.6729

  Sarcastic Precision: 0.6576
  Sarcastic F1-score: 0.6855
  Sarcastic Recall: 0.7159

  Non-sarcastic Precision: 0.6883
  Non-Sarcastic F1-scor