Sub Task A results: https://arxiv.org/pdf/1903.08983.pdf

Data from: https://github.com/sandro272/SemEval2019-OffensEval/tree/51dde8c38b512d5fb536fd74b2afd3dc7ed73831/train_data

https://github.com/sandro272/SemEval2019-OffensEval/tree/51dde8c38b512d5fb536fd74b2afd3dc7ed73831/test_data

pre-processing: https://github.com/sandro272/SemEval2019-OffensEval/blob/51dde8c38b512d5fb536fd74b2afd3dc7ed73831/code/demo.py#L9



In [None]:
!pip install transformers

In [None]:
import torch   
from torchtext import data, datasets 
from torchtext.data import TabularDataset 
import pandas as pd
from torchtext.vocab import Vectors
from torch.nn import init
import torch.nn as nn
from torchtext.vocab import Vectors
import matplotlib.pyplot as plt
from transformers import get_linear_schedule_with_warmup,get_cosine_schedule_with_warmup,get_constant_schedule_with_warmup,get_cosine_with_hard_restarts_schedule_with_warmup
from transformers import BertTokenizer
from torch.utils.data import TensorDataset, random_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import BertForSequenceClassification, AdamW, BertConfig,AdamWeightDecay
from sklearn.metrics import f1_score
import numpy as np
import random

if torch.cuda.is_available():
  device = "cuda"
else:
  device = "cpu"

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
!ls "/content/drive/My Drive/576-project"

In [None]:
torch.manual_seed(2020)
torch.backends.cudnn.deterministic = True

optimizer &learning rate scheduler

**hyperparameters**


In [None]:
epochs = 5
learning_rate = 2e-5
eps = 1e-8
batch_size = 64
bert_type = 'bert-base-uncased'

read traindata and testdata

In [None]:
traindata = pd.read_csv('/content/drive/My Drive/576-project/traindata.csv')
testdata = pd.read_csv('/content/drive/My Drive/576-project/testdata.csv')

In [None]:
traindata.head()
testdata.head()

In [None]:
# Get the lists of sentences and their labels.
train_sentences = traindata.tweet.values
train_labels = traindata.label.values

tokenization & input formatting
1. bert tokenizer

In [None]:
tokenizer = BertTokenizer.from_pretrained(bert_type, do_lower_case=True)

In [None]:
max_len = 0

# For every sentence...
for sent in train_sentences:

    # Tokenize the text and add `[CLS]` and `[SEP]` tokens.
    input_ids = tokenizer.encode(sent, add_special_tokens=True)

    # Update the maximum sentence length.
    max_len = max(max_len, len(input_ids))

print('Max sentence length: ', max_len)

since the longest sentences here is 115, set the maximum length to 128. 

In [None]:
# Tokenize all of the sentences and map the tokens to thier word IDs.
input_ids = []
attention_masks = []

# For every sentence...
for sent in train_sentences:
    encoded_dict = tokenizer.encode_plus(
                        sent,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = 128,           # Pad & truncate all sentences.
                        pad_to_max_length = True,
                        return_attention_mask = True,   # Construct attn. masks.
                        return_tensors = 'pt',     # Return pytorch tensors.
                   )
    
    # Add the encoded sentence to the list.    
    input_ids.append(encoded_dict['input_ids'])
    
    # And its attention mask (simply differentiates padding from non-padding).
    attention_masks.append(encoded_dict['attention_mask'])

# Convert the lists into tensors.
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(train_labels)

create train and validation set (the proportion is 8:2)

In [None]:

# Combine the training inputs into a TensorDataset.
dataset = TensorDataset(input_ids, attention_masks, labels)

# Calculate the number of samples to include in each set.
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

# Divide the dataset by randomly selecting samples.
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

print('{:>5,} training samples'.format(train_size))
print('{:>5,} validation samples'.format(val_size))

In [None]:
train_dataloader = DataLoader(train_dataset,sampler = RandomSampler(train_dataset), batch_size = batch_size)
validation_dataloader = DataLoader(val_dataset,sampler = SequentialSampler(val_dataset),batch_size = batch_size )

 Use BertForSequenceClassification

 refer: https://huggingface.co/transformers/v2.2.0/model_doc/bert.html

In [None]:

model = BertForSequenceClassification.from_pretrained(
    bert_type, 
    num_labels = 2,   
    output_attentions = False, 
    output_hidden_states = False, 
)

model.cuda()

train the classification model

optimizer

In [None]:
optimizer = AdamW(model.parameters(),lr = learning_rate,eps=eps)

In [None]:
# Total number of training steps is [number of batches] x [number of epochs]. 
total_steps = len(train_dataloader) * epochs

scheduler =  get_cosine_schedule_with_warmup(optimizer,num_warmup_steps = 1,num_training_steps = total_steps,num_cycles=0.6 )
#scheduler =  get_constant_schedule_with_warmup(optimizer,num_warmup_steps = 0)

training loop

 This training code is based on the `run_glue.py` script here:
https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128


In [None]:
seed_val = 576

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [None]:
training_stats = []

In [None]:
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)


In [None]:
def f1score(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    TP = np.sum((labels_flat == 1) & (pred_flat == 1))
    FP = np.sum((labels_flat == 0) & (pred_flat == 1))
    FN = np.sum((labels_flat == 1) & (pred_flat == 0))  
    precision = TP / (TP + FP)
    #precision_0
    recall = TP / (TP + FN)
    #recall_0 =
    f1 = 2 * precision * recall / (precision + recall)
    return np.sum(f1)

In [None]:
def f1_score_macro(preds, labels):

    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    f1 = f1_score(labels_flat, pred_flat,average='macro')
    return np.sum(f1)

In [None]:
for epoch_i in range(0, epochs):
    
    # ========================================
    #               Training
    # ========================================
    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    # Reset the total loss for this epoch.
    total_train_loss = 0
    total_train_accuracy = 0
    total_train_f1 = 0
    total_train_f1_macro = 0

    model.train()

    # For each batch of training data...
    for step, batch in enumerate(train_dataloader):
        if step % 20 == 0 and not step == 0:
            # Report progress.
            print('  Batch {:>5,}  of  {:>5,}.   '.format(step, len(train_dataloader)))
            print("  batch loss: {0:.4f}".format(total_train_loss/step))
            if(total_train_loss/step < 0.41):
               break
            

        
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)

        model.zero_grad()        
        (loss, logits) = model(b_input_ids,token_type_ids=None,attention_mask=b_input_mask,labels=b_labels)
        
        total_train_loss += loss.item()
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy() 
       

        total_train_accuracy += flat_accuracy(logits, label_ids)
        total_train_f1 += f1score(logits, label_ids)
        total_train_f1_macro += f1_score_macro(logits, label_ids)
        
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
 

        optimizer.step()
        scheduler.step()

    # Calculate the average loss over all of the batches.
    avg_train_loss = total_train_loss / len(train_dataloader)
    avg_train_accuracy = total_train_accuracy / len(train_dataloader)            

    print("")
    print("  Average training loss: {0:.4f}".format(avg_train_loss))
    
    avg_train_f1 = total_train_f1 / len(train_dataloader)
    print("  Average training F1: {0:.4f}".format(avg_train_f1))   

    avg_train_f1_macro = total_train_f1_macro / len(train_dataloader)
    print("  Average training macro-F1: {0:.4f}".format(avg_train_f1_macro))   

    # ========================================
    #               Validation
    # ========================================
    print("")
    print("Running Validation...")
    model.eval()
    total_eval_accuracy = 0
    total_eval_loss = 0
    nb_eval_steps = 0
    total_eval_f1 = 0
    total_eval_f1_macro = 0

    for batch in validation_dataloader:
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)
        with torch.no_grad():        
            (loss, logits) = model(b_input_ids,token_type_ids=None,attention_mask=b_input_mask,labels=b_labels)     
        total_eval_loss += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        total_eval_accuracy += flat_accuracy(logits, label_ids)

        total_eval_f1 += f1score(logits, label_ids)

        total_eval_f1_macro += f1_score_macro(logits, label_ids)
        
    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
    print("  Accuracy: {0:.4f}".format(avg_val_accuracy))

    # Calculate the average loss over all of the batches.
    avg_val_loss = total_eval_loss / len(validation_dataloader) 
    print("  Validation Loss: {0:.4f}".format(avg_val_loss))

    avg_val_f1 = total_eval_f1 / len(validation_dataloader) 
    print("  Validation F1: {0:.4f}".format(avg_val_f1))

    avg_val_f1_macro = total_eval_f1_macro / len(validation_dataloader) 
    print("  Validation macro-F1: {0:.4f}".format(avg_val_f1_macro))

    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Train. Accur.': avg_train_accuracy,
            'Valid. Accur.': avg_val_accuracy,
            'Train F1.': avg_train_f1,
            'Valid F1.': avg_val_f1,
            'Train macro-F1':avg_train_f1_macro,
            'Valid macro-F1':avg_val_f1_macro,
        }
    )
print("")

In [None]:

df_stats = pd.DataFrame(data=training_stats)
df_stats

In [None]:
fig = plt.figure()
ax1 = fig.add_subplot(1, 3, 1)
ax2 = fig.add_subplot(1, 3, 2)
ax3 = fig.add_subplot(1, 3, 3)

ax1.plot(df_stats['Valid. Loss'], label='validation loss')
ax1.plot(df_stats['Training Loss'], label='training loss')

ax2.plot(df_stats['Train. Accur.'], label='training accuracy')
ax2.plot(df_stats['Valid. Accur.'], label='validation accuracy')

ax3.plot(df_stats['Train macro-F1'], label='training macro-F1-score.')
ax3.plot(df_stats['Valid macro-F1'], label='validation macro-F1-score.')


ax1.set_xlabel('Epoch (s)')
ax1.set_ylabel('Loss')
ax1.set_title('Loss')
ax1.set_xticks([1,2,3,4,5])
ax1.legend()


ax2.set_xlabel('Epoch (s)')
ax2.set_ylabel('Accuracy')
ax2.set_title('Accuracy')
ax2.set_xticks([1,2,3,4,5])
ax2.legend()

ax3.set_xlabel('Epoch (s)')
ax3.set_ylabel('macro-F1')
ax3.set_title('macro-F1')
ax3.set_xticks([1,2,3,4,5])
ax3.legend()

plt.show()

test dataset

In [None]:
# Create sentence and label lists
sentences = testdata.tweet.values
labels = testdata.label.values

# Tokenize all of the sentences and map the tokens to thier word IDs.
input_ids = []
attention_masks = []

In [None]:
for sent in sentences:
    encoded_dict = tokenizer.encode_plus(
                        sent,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = 128,           # Pad & truncate all sentences.
                        pad_to_max_length = True,
                        return_attention_mask = True,   # Construct attn. masks.
                        return_tensors = 'pt',     # Return pytorch tensors.
                   )
        
    input_ids.append(encoded_dict['input_ids'])
    attention_masks.append(encoded_dict['attention_mask'])

input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(labels)
  
prediction_data = TensorDataset(input_ids, attention_masks, labels)
prediction_sampler = SequentialSampler(prediction_data)
prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)

In [None]:
from sklearn.metrics import confusion_matrix,plot_confusion_matrix

def TP(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    TP = np.sum((labels_flat == 1) & (pred_flat == 1))
    return(TP)
def FP(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    FP = np.sum((labels_flat == 0) & (pred_flat == 1))
    return(FP)
def FN(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    FN = np.sum((labels_flat == 1) & (pred_flat == 0))
    return(FN)
def TN(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    TN = np.sum((labels_flat == 0) & (pred_flat == 0))
    return(TN)

In [None]:
model.eval()
predictions , true_labels = [], []
# Predict 
total_test_accuracy = 0
total_test_f1 = 0
total_test_f1_macro = 0
tp = 0
fp = 0
fn = 0
tn = 0

for batch in prediction_dataloader:
  batch = tuple(t.to(device) for t in batch)
  b_input_ids, b_input_mask, b_labels = batch
  with torch.no_grad():
      outputs = model(b_input_ids, token_type_ids=None, 
                      attention_mask=b_input_mask)
  logits = outputs[0]
  logits = logits.detach().cpu().numpy()
  label_ids = b_labels.to('cpu').numpy()
  total_test_accuracy += flat_accuracy(logits, label_ids)
  avg_test_accuracy = total_test_accuracy / len(prediction_dataloader)
  
  total_test_f1 += f1score(logits, label_ids)
  avg_test_f1 = total_test_f1 / len(prediction_dataloader)
  test_f1 = f1score(logits, label_ids)
  #print(" test f1: {0:.4f}".format(test_f1))
  

  total_test_f1_macro += f1_score_macro(logits, label_ids)
  avg_test_f1_macro = total_test_f1_macro / len(prediction_dataloader)
  test_f1_macro = f1_score_macro(logits, label_ids)
  #print(" test f1_macro: {0:.4f}".format(test_f1_macro))

  tp += TP(logits, label_ids)
  fp += FP(logits, label_ids)
  fn += FN(logits, label_ids)
  tn += TN(logits, label_ids) 
  
  predictions.append(logits)
  true_labels.append(label_ids)

print(" test Accuracy: {0:.4f}".format(avg_test_accuracy))
print(" test f1: {0:.4f}".format(avg_test_f1))
print(" test f1_macro: {0:.4f}".format(avg_test_f1_macro))
#print(" test tn: {0:.4f}".format(avg_tn))
#print(" test fp: {0:.4f}".format(avg_fp))


In [None]:
print(tp,fp,fn,tn)

In [None]:
pred1 = np.array([[163,46],[77,574]])
pred1

In [None]:
def plot_confusion_matrix(cm, classes,cmap=plt.cm.Blues):

    print(cm)

    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")
    
    fig.tight_layout()
    plt.show()
    return ax


In [None]:
plot_confusion_matrix(pred1,classes = ["OFFENSIVE","NOT"])