In [None]:
from google.colab import drive

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!pip3 install torch torchvision pandas transformers scikit-learn tensorflow numpy seaborn matplotlib textwrap3 sentencepiece

In [None]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [None]:
import transformers
from transformers import XLNetModel, XLNetTokenizer, XLNetForSequenceClassification, BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
import torch
from torchvision import transforms
import torchvision
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import re
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from collections import defaultdict
from textwrap import wrap
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from PIL import Image

PRE_TRAINED_MODEL_NAME = 'xlnet-base-cased'

def clean_text(text):
    text = re.sub(r"@[A-Za-z0-9_]+", ' ', text)
    text = re.sub(r"https?://[A-Za-z0-9./]+", ' ', text)
    text = re.sub(r"[^a-zA-z.,!?'0-9]", ' ', text)
    text = re.sub('\t', ' ',  text)
    text = re.sub(r" +", ' ', text)
    return text

def label_to_target(text):
  if text == "informative":
    return 1
  else:
    return 0

df_train = pd.read_csv("./gdrive/MyDrive/FYP/task_informative_text_img_agreed_lab_train.tsv", sep='\t')
df_train = df_train[['image', 'tweet_text', 'label_text']]
df_train = df_train.sample(frac=1, random_state = 24).reset_index(drop=True)
df_train['tweet_text'] = df_train['tweet_text'].apply(clean_text)
df_train['label_text'] = df_train['label_text'].apply(label_to_target)

df_val = pd.read_csv("./gdrive/MyDrive/FYP/task_informative_text_img_agreed_lab_dev.tsv", sep='\t')
df_val = df_val[['image', 'tweet_text', 'label_text']]
df_val = df_val.sample(frac=1, random_state = 24).reset_index(drop=True)
df_val['tweet_text'] = df_val['tweet_text'].apply(clean_text)
df_val['label_text'] = df_val['label_text'].apply(label_to_target)

df_test = pd.read_csv("./gdrive/MyDrive/FYP/task_informative_text_img_agreed_lab_test.tsv", sep='\t')
df_test = df_test[['image', 'tweet_text', 'label_text']]
df_test = df_test.sample(frac=1, random_state = 24).reset_index(drop=True)
df_test['tweet_text'] = df_test['tweet_text'].apply(clean_text)
df_test['label_text'] = df_test['label_text'].apply(label_to_target)


In [None]:
data_dir = "./gdrive/MyDrive/FYP/"
class DisasterTweetDataset(Dataset):

  def __init__(self, tweets, targets, paths, tokenizer, max_len):
    self.tweets = tweets
    self.targets = targets
    self.tokenizer = tokenizer
    self.max_len = max_len
    self.paths = paths
    self.transform = transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
  
  def __len__(self):
    return len(self.tweets)
  
  def __getitem__(self, item):
    tweet = str(self.tweets[item])
    target = self.targets[item]
    path = str(self.paths[item])
    img = Image.open(data_dir+self.paths[item]).convert('RGB')
    img = self.transform(img)  

    encoding = self.tokenizer.encode_plus(
      tweet,
      add_special_tokens=True,
      max_length=self.max_len,
      return_token_type_ids=False,
      padding='max_length',
      return_attention_mask=True,
      return_tensors='pt',
      truncation = True
    )

    return {
      'tweet_text': tweet,
      'input_ids': encoding['input_ids'].flatten(),
      'attention_mask': encoding['attention_mask'].flatten(),
      'targets': torch.tensor(target, dtype=torch.long),
      'tweet_image': img
    }

def create_data_loader(df, tokenizer, max_len, batch_size):
  ds = DisasterTweetDataset(
    tweets=df.tweet_text.to_numpy(),
    targets=df.label_text.to_numpy(),
    paths=df.image.to_numpy(),
    tokenizer=tokenizer,
    max_len=max_len
  )

  return DataLoader(
    ds,
    batch_size=batch_size,
    num_workers=2
  )


class TweetClassifier(nn.Module):

  def __init__(self):
    super(TweetClassifier, self).__init__()
    self.bert = XLNetModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
    for param in self.bert.parameters():
      param.requires_grad = False
    
    self.resnet = torchvision.models.resnext50_32x4d(pretrained=True)
    for param in self.resnet.parameters():
      param.requires_grad = False
    
    self.bn = nn.BatchNorm1d(self.bert.config.hidden_size + 1000)

    self.linear1 = nn.Linear(self.bert.config.hidden_size + 1000, 1000)
    self.relu1    = nn.ReLU()
    self.dropout1 = nn.Dropout(p=0.4)

    self.linear2 = nn.Linear(1000, 500)
    self.relu2    = nn.ReLU()
    self.dropout2 = nn.Dropout(p=0.2)

    self.linear3 = nn.Linear(500, 250)
    self.relu3    = nn.ReLU()
    self.dropout3 = nn.Dropout(p=0.1)

    self.linear4 = nn.Linear(250, 125)
    self.relu4    = nn.ReLU()
    self.dropout4 = nn.Dropout(p=0.02)

    self.linear5 = nn.Linear(125, 1)
    self.sigmoid = nn.Sigmoid()
  
  def forward(self, input_ids, attention_mask, tweet_img):
    text_output = self.bert(
      input_ids=input_ids,
      attention_mask=attention_mask,
      return_dict=False
    )
    image_output = self.resnet(tweet_img)
    merged_output = torch.cat((torch.mean(text_output[0], 1), image_output), dim=1)
    bn_output = self.bn(merged_output)

    linear1_output = self.linear1(bn_output)
    relu1_output = self.relu1(linear1_output)
    dropout1_output = self.dropout1(relu1_output)

    linear2_output = self.linear2(dropout1_output)
    relu2_output = self.relu2(linear2_output)
    dropout2_output = self.dropout2(relu2_output)

    linear3_output = self.linear3(dropout2_output)
    relu3_output = self.relu3(linear3_output)
    dropout3_output = self.dropout3(relu3_output)

    linear4_output = self.linear4(dropout3_output)
    relu4_output = self.relu4(linear4_output)
    dropout4_output = self.dropout4(relu4_output)

    linear5_output = self.linear5(dropout4_output)


    probas = self.sigmoid(linear5_output)
    return probas


def train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples):
  model = model.train()

  losses = []
  correct_predictions = 0
  
  for d in data_loader:
    tweet_imgs = d["tweet_image"].to(device)
    input_ids = d["input_ids"].to(device)
    attention_mask = d["attention_mask"].to(device)
    targets = d["targets"].reshape(-1, 1).float()
    targets = targets.to(device)

    outputs = model(
      input_ids=input_ids,
      attention_mask=attention_mask,
      tweet_img = tweet_imgs
    )


    loss = loss_fn(outputs, targets)

    correct_predictions += torch.sum(torch.round(outputs) == targets)
    losses.append(loss.item())

    loss.backward()
    nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step()
    scheduler.step()
    optimizer.zero_grad()

  return correct_predictions.double() / n_examples, np.mean(losses)

def eval_model(model, data_loader, loss_fn, device, n_examples):
  model = model.eval()

  losses = []
  correct_predictions = 0

  with torch.no_grad():
    for d in data_loader:
      tweet_imgs = d["tweet_image"].to(device)
      input_ids = d["input_ids"].to(device)
      attention_mask = d["attention_mask"].to(device)
      targets = d["targets"].reshape(-1, 1).float()
      targets = targets.to(device)

      outputs = model(
        input_ids=input_ids,
        attention_mask=attention_mask,
        tweet_img = tweet_imgs
      )

      loss = loss_fn(outputs, targets)

      correct_predictions += torch.sum(torch.round(outputs) == targets)
      losses.append(loss.item())

  return correct_predictions.double() / n_examples, np.mean(losses)

In [None]:
BATCH_SIZE = 512
MAX_LEN = 150

tokenizer = XLNetTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)

train_data_loader = create_data_loader(df_train, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = create_data_loader(df_val, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader = create_data_loader(df_test, tokenizer, MAX_LEN, BATCH_SIZE)


model = TweetClassifier()
model = model.to(device)

EPOCHS = 50

optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False)
total_steps = len(train_data_loader) * EPOCHS

scheduler = get_linear_schedule_with_warmup(
  optimizer,
  num_warmup_steps=0,
  num_training_steps=total_steps
)

loss_fn = nn.BCELoss().to(device)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=798011.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1382015.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=760.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=467042463.0, style=ProgressStyle(descri…




Downloading: "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth" to /root/.cache/torch/hub/checkpoints/resnext50_32x4d-7cdf4587.pth


HBox(children=(FloatProgress(value=0.0, max=100441675.0), HTML(value='')))




In [None]:
checkpoint = torch.load("./gdrive/MyDrive/FYP/XlnetResNeXt-checkpoint.t7",map_location=torch.device('cpu'))
model.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
start_epoch = checkpoint['epoch']
best_accuracy = checkpoint['best_accuracy']

print(start_epoch)
print(best_accuracy)

29
tensor(0.8468, dtype=torch.float64)


In [None]:
history = defaultdict(list)
start_epoch = 0
best_accuracy = -1

# checkpoint = torch.load("./gdrive/MyDrive/FYP/BertResNet-checkpoint.t7")
# model.load_state_dict(checkpoint['state_dict'])
# optimizer.load_state_dict(checkpoint['optimizer'])
# start_epoch = checkpoint['epoch']
# best_accuracy = checkpoint['best_accuracy']
# print(start_epoch)
# print(best_accuracy)


for epoch in range(EPOCHS):

  print(f'Epoch {start_epoch + epoch + 1}/{start_epoch + EPOCHS}')
  print('-' * 10)

  train_acc, train_loss = train_epoch(
    model,
    train_data_loader,    
    loss_fn, 
    optimizer, 
    device, 
    scheduler, 
    len(df_train)
  )

  print(f'Train loss {train_loss} accuracy {train_acc}')

  val_acc, val_loss = eval_model(
    model,
    val_data_loader,
    loss_fn, 
    device, 
    len(df_val)
  )

  print(f'Val   loss {val_loss} accuracy {val_acc}')
  print()

  history['train_acc'].append(train_acc)
  history['train_loss'].append(train_loss)
  history['val_acc'].append(val_acc)
  history['val_loss'].append(val_loss)

  if val_acc > best_accuracy:
    state = {
            'best_accuracy': val_acc,
            'epoch': start_epoch+epoch+1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
    }
    savepath= "./gdrive/MyDrive/FYP/XlnetResNeXt-checkpoint.t7"
    torch.save(state,savepath)
    best_accuracy = val_acc

Epoch 1/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.6174275216303373 accuracy 0.6608686595146339
Val   loss 0.517289012670517 accuracy 0.6713286713286714

Epoch 2/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.48985101988441065 accuracy 0.7508592854910947
Val   loss 0.45694825053215027 accuracy 0.7997457088366179

Epoch 3/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.4000147879123688 accuracy 0.8376210811373815
Val   loss 0.4223952442407608 accuracy 0.8111888111888113

Epoch 4/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.3454629245557283 accuracy 0.8524112071659202
Val   loss 0.43520487844944 accuracy 0.813731722822632

Epoch 5/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.32566360737148087 accuracy 0.8619935423393397
Val   loss 0.4344615116715431 accuracy 0.8181818181818182

Epoch 6/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.3078166779718901 accuracy 0.8706384751588376
Val   loss 0.4300250932574272 accuracy 0.8207247298156389

Epoch 7/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.30514628949918243 accuracy 0.8724091240495782
Val   loss 0.42493997514247894 accuracy 0.82453909726637

Epoch 8/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.28989928098101364 accuracy 0.8801166545151546
Val   loss 0.41851554811000824 accuracy 0.8270820089001907

Epoch 9/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.28818509139512716 accuracy 0.8799083428809499
Val   loss 0.4030940309166908 accuracy 0.8340750158931978

Epoch 10/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.2788267057192953 accuracy 0.8838662639308406
Val   loss 0.4039871469140053 accuracy 0.8366179275270185

Epoch 11/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.273712138596334 accuracy 0.8877200291636288
Val   loss 0.4007994383573532 accuracy 0.8404322949777495

Epoch 12/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.2706366722521029 accuracy 0.8879283407978336
Val   loss 0.39104317873716354 accuracy 0.8417037507946599

Epoch 13/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.26873414453707245 accuracy 0.8918862618477241
Val   loss 0.3878622129559517 accuracy 0.8410680228862047

Epoch 14/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.2603033561455576 accuracy 0.8965732736173315
Val   loss 0.38385408371686935 accuracy 0.8417037507946599

Epoch 15/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.25514320950759084 accuracy 0.8928236642016456
Val   loss 0.39359813928604126 accuracy 0.8404322949777495

Epoch 16/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.258559461486967 accuracy 0.892927820018748
Val   loss 0.3819297105073929 accuracy 0.8436109345200254

Epoch 17/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.25161077474292953 accuracy 0.8941776898239766
Val   loss 0.37787624448537827 accuracy 0.8436109345200254

Epoch 18/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.24603986347976484 accuracy 0.9001145713988126
Val   loss 0.3782297596335411 accuracy 0.8429752066115702

Epoch 19/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.2439024330754029 accuracy 0.9011561295698365
Val   loss 0.3856223523616791 accuracy 0.8410680228862047

Epoch 20/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.2446115338488629 accuracy 0.8987605457764816
Val   loss 0.3860945850610733 accuracy 0.8429752066115702

Epoch 21/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.24021799313394646 accuracy 0.904280804082908
Val   loss 0.3908577337861061 accuracy 0.8404322949777495

Epoch 22/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.23651834381254097 accuracy 0.9032392459118842
Val   loss 0.3891975060105324 accuracy 0.8423394787031151

Epoch 23/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.24083683286842547 accuracy 0.8991771690448911
Val   loss 0.39008256047964096 accuracy 0.8417037507946599

Epoch 24/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.235783955768535 accuracy 0.9033434017289865
Val   loss 0.3904232680797577 accuracy 0.8423394787031151

Epoch 25/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.22973058098240903 accuracy 0.9089678158525153
Val   loss 0.38608764857053757 accuracy 0.8429752066115702

Epoch 26/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.22767365135644613 accuracy 0.9065722320591605
Val   loss 0.39156804233789444 accuracy 0.8410680228862047

Epoch 27/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.235876852744504 accuracy 0.902093531923758
Val   loss 0.38576650619506836 accuracy 0.8461538461538461

Epoch 28/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.2274542411691264 accuracy 0.909696906572232
Val   loss 0.389716699719429 accuracy 0.8448823903369358

Epoch 29/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.22330063267758019 accuracy 0.9093844391209249
Val   loss 0.3876388445496559 accuracy 0.8467895740623014

Epoch 30/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.2250657630594153 accuracy 0.9080304134985938
Val   loss 0.3931454047560692 accuracy 0.8442466624284807

Epoch 31/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.22207061475829074 accuracy 0.9089678158525153
Val   loss 0.39090824872255325 accuracy 0.8442466624284807

Epoch 32/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.22293542325496674 accuracy 0.9098010623893344
Val   loss 0.39399299025535583 accuracy 0.8429752066115702

Epoch 33/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.22351814963315664 accuracy 0.9114675554629725
Val   loss 0.38968174159526825 accuracy 0.8442466624284807

Epoch 34/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.21733566491227402 accuracy 0.9123008019997917
Val   loss 0.39499572664499283 accuracy 0.8423394787031151

Epoch 35/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.21455484314968712 accuracy 0.9163628788667847
Val   loss 0.3885865956544876 accuracy 0.845518118245391

Epoch 36/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.2164141661242435 accuracy 0.9160504114154775
Val   loss 0.39121294766664505 accuracy 0.8448823903369358

Epoch 37/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.2132737111104162 accuracy 0.9170919695865014
Val   loss 0.39342670887708664 accuracy 0.845518118245391

Epoch 38/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.21886407466311203 accuracy 0.910530153109051
Val   loss 0.38754069805145264 accuracy 0.8448823903369358

Epoch 39/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.21583695788132518 accuracy 0.9118841787313822
Val   loss 0.3874114528298378 accuracy 0.8448823903369358

Epoch 40/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.21481207091557353 accuracy 0.9130298927195084
Val   loss 0.3874918892979622 accuracy 0.8429752066115702

Epoch 41/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.2137193240617451 accuracy 0.9131340485366107
Val   loss 0.3841201290488243 accuracy 0.8442466624284807

Epoch 42/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.20859976191269725 accuracy 0.9159462555983752
Val   loss 0.3847603350877762 accuracy 0.8442466624284807

Epoch 43/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.2116543627098987 accuracy 0.9165711905009895
Val   loss 0.3853360116481781 accuracy 0.8448823903369358

Epoch 44/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.21136495157292015 accuracy 0.915633788147068
Val   loss 0.3850027024745941 accuracy 0.845518118245391

Epoch 45/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.209483487041373 accuracy 0.9174044370378085
Val   loss 0.3859716057777405 accuracy 0.845518118245391

Epoch 46/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.20749654816953758 accuracy 0.9163628788667847
Val   loss 0.3874707594513893 accuracy 0.845518118245391

Epoch 47/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.20998692747793699 accuracy 0.9166753463180919
Val   loss 0.38749896734952927 accuracy 0.8461538461538461

Epoch 48/50
----------


  "Palette images with Transparency expressed in bytes should be "


Train loss 0.21037562113059194 accuracy 0.9159462555983752
Val   loss 0.38627149909734726 accuracy 0.845518118245391

Epoch 49/50
----------


  "Palette images with Transparency expressed in bytes should be "


In [None]:
state = {
        'epoch': start_epoch + EPOCHS,
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
}
savepath= "./gdrive/MyDrive/FYP/XlnetResNeXt-checkpoint-{}.t7".format(start_epoch + EPOCHS)
torch.save(state,savepath)

In [None]:
plt.plot(history['train_acc'], label='train accuracy')
plt.plot(history['val_acc'], label='validation accuracy')

plt.title('Training history')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()
plt.ylim([0, 1]);

NameError: ignored

In [None]:
def get_predictions(model, data_loader):
  model = model.eval()
  
  predictions = []
  real_values = []

  with torch.no_grad():
    for d in data_loader:

      input_ids = d["input_ids"].to(device)
      tweet_imgs = d["tweet_image"].to(device)
      attention_mask = d["attention_mask"].to(device)
      targets = d["targets"].reshape(-1, 1).float()
      targets = targets.to(device)

      outputs = model(
        input_ids=input_ids,
        attention_mask=attention_mask,
        tweet_img = tweet_imgs
      )
      preds = torch.round(outputs)


      predictions.extend(preds)
      real_values.extend(targets)

  predictions = torch.stack(predictions).cpu()
  real_values = torch.stack(real_values).cpu()
  return predictions, real_values

y_pred, y_test = get_predictions(
  model,
  test_data_loader
)

print(classification_report(y_test, y_pred, target_names=['Not Informative', 'Informative'], digits=4))

  "Palette images with Transparency expressed in bytes should be "


                 precision    recall  f1-score   support

Not Informative     0.8864    0.6349    0.7399       504
    Informative     0.8431    0.9602    0.8979      1030

       accuracy                         0.8533      1534
      macro avg     0.8648    0.7976    0.8189      1534
   weighted avg     0.8574    0.8533    0.8460      1534

