In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import os.path as path
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader, Dataset, TensorDataset
from PIL import Image

In [None]:
project_folder = '/content/drive/MyDrive/CrisisMMD_v2.0'
image_path = '/content/drive/MyDrive/CrisisMMD_v2.0'

train_data = pd.read_csv(path.join(project_folder,
              'crisismmd_datasplit_all/task_informative_text_img_train.tsv'),
              sep = '\t')
test_data = pd.read_csv(path.join(project_folder,
              'crisismmd_datasplit_all/task_informative_text_img_test.tsv'),
              sep = '\t')
dev_data = pd.read_csv(path.join(project_folder,
              'crisismmd_datasplit_all/task_informative_text_img_dev.tsv'),
              sep = '\t')

print(train_data.head())

FileNotFoundError: ignored

In [None]:
labels_dict = {'informative': 1, 'not_informative':0}

def convert_label(string):
    return labels_dict.get(string, -1)  # return -1 for unknown labels

train_data['label_image'] = train_data['label_image'].apply(convert_label)
test_data['label_image'] = test_data['label_image'].apply(convert_label)
dev_data['label_image'] = dev_data['label_image'].apply(convert_label)

NameError: ignored

In [None]:
IMAGE_WIDTH = 224
IMAGE_HEIGHT = 224

# define the transformations to be applied to the dataset
transform = transforms.Compose([
    transforms.Resize((IMAGE_HEIGHT, IMAGE_HEIGHT)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Image model

## Hyper parameters

In [None]:
BATCH_SIZE = 32
LEARNING_RATE = 0.001
NUM_EPOCH = 4


## Data loader

In [None]:
class ImageDataset(Dataset):
    def __init__(self, data_set, image_dir = image_path, transform = None):
        self.image_dir = image_dir
        self.data = data_set
        self.transform = transform

    def __getitem__(self, index):
        row = self.data.iloc[index]
        image_path = path.join(self.image_dir, row['image'])
        label = row['label_image']

        # Load Image
        image = Image.open(image_path)
        image = image.resize((IMAGE_WIDTH, IMAGE_HEIGHT))
        image = image.convert('RGB')

        label_tensor = torch.tensor(label)
        image = self.transform(image)
        return (image, label_tensor)

    def __len__(self):
        return len(self.data)


train_data = ImageDataset(train_data, image_dir = image_path, transform = transform)
test_data = ImageDataset(test_data, image_dir = image_path, transform = transform)
dev_data = ImageDataset(dev_data, image_dir = image_path, transform = transform)


## Image model definition

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [None]:
class ResNetWithFC(nn.Module):
    def __init__(self, num_classes=1000, fc_hidden_dim=512):
        super(ResNetWithFC, self).__init__()
        self.resnet = models.resnet50(pretrained=True)
        self.fc1 = nn.Linear(2048, fc_hidden_dim)
        self.fc2 = nn.Linear(fc_hidden_dim, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.resnet.conv1(x)
        x = self.resnet.bn1(x)
        x = self.resnet.relu(x)
        x = self.resnet.maxpool(x)

        x = self.resnet.layer1(x)
        x = self.resnet.layer2(x)
        x = self.resnet.layer3(x)
        x = self.resnet.layer4(x)

        x = self.resnet.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.softmax(x)

        return x

In [None]:
resnet = ResNetWithFC(2, 512)
BATCH_SIZE = 32
LEARNING_RATE = 0.0001
NUM_EPOCH = 5

resnet.load_state_dict(path.join(project_folder, 'models', 'image.pth'))
resnet.to(device)

train_loader = DataLoader(train_data, batch_size=BATCH_SIZE,
                          shuffle=False, num_workers=2, pin_memory = True)

test_loader = DataLoader(test_data, batch_size=BATCH_SIZE,
                          shuffle=True, num_workers=2, pin_memory = True)


# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet.parameters(), lr=LEARNING_RATE)

for epoch in range(NUM_EPOCH):
    # # Train the model for one epoch
    # resnet.train()
    # print("___________Training_______________")
    # train_loss = 0.0
    # train_acc = 0.0
    # for images, labels in train_loader:
    #     images = images.to(device)
    #     labels = labels.type(torch.LongTensor)
    #     labels = labels.to(device)
    #     optimizer.zero_grad()
    #     outputs = resnet(images)
    #     loss = criterion(outputs, labels)
    #     loss.backward()
    #     optimizer.step()
    #     train_loss += loss.item() * images.size(0)
    #     _, preds = torch.max(outputs, 1)
    #     train_acc += torch.sum(preds == labels.data)
    # train_loss = train_loss / len(train_loader.dataset)
    # train_acc = train_acc.double() / len(train_loader.dataset)

    # Evaluate the model on the test set
    resnet.eval()
    print("___________Eval_____________")
    test_loss = 0.0
    test_acc = 0.0
    with torch.no_grad():
      for images, labels in test_loader:
        images = images.to(device)
        labels = labels.type(torch.LongTensor)
        labels = labels.to(device)
        outputs = resnet(images)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        test_acc += torch.sum(preds == labels.data)
    test_loss = test_loss / len(test_loader.dataset)
    test_acc = test_acc.double() / len(test_loader.dataset)

    # Print the training and test accuracy and loss for this epoch
    print('Epoch {}/{} - Train Loss: {:.4f}, Train Acc: {:.4f}, Test Loss: {:.4f}, Test Acc: {:.4f}'
      .format(epoch + 1, NUM_EPOCH, train_loss, train_acc, test_loss, test_acc))

print(resnet.fc2)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 346MB/s]


___________Training_______________




___________Eval_____________




Epoch 1/5 - Train Loss: 0.5631, Train Acc: 0.7357, Test Loss: 0.4937, Test Acc: 0.8122
___________Training_______________




___________Eval_____________




Epoch 2/5 - Train Loss: 0.5126, Train Acc: 0.7928, Test Loss: 0.5194, Test Acc: 0.7827
___________Training_______________




___________Eval_____________




Epoch 3/5 - Train Loss: 0.4895, Train Acc: 0.8169, Test Loss: 0.5016, Test Acc: 0.8082
___________Training_______________




___________Eval_____________




Epoch 4/5 - Train Loss: 0.4724, Train Acc: 0.8359, Test Loss: 0.5037, Test Acc: 0.8006
___________Training_______________




___________Eval_____________




Epoch 5/5 - Train Loss: 0.4594, Train Acc: 0.8498, Test Loss: 0.5337, Test Acc: 0.7702
Linear(in_features=512, out_features=2, bias=True)


In [None]:
# Saving the trained model
model_path = path.join(project_folder, 'models', 'image.pth')
torch.save(resnet.state_dict(), model_path)




# Text model

In [None]:
#!pip install transformers
from transformers import AdamW, BertConfig
import torch
import math
import transformers
from transformers import BertForSequenceClassification
import numpy as np

tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-uncased',
                                                       do_lower_case=True)
model_bert = BertForSequenceClassification.from_pretrained('bert-base-uncased',
                                                           num_labels=2,
                                                           output_attentions=False,
                                                           output_hidden_states=False)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [None]:
train_data['label_text'] = train_data['label_text'].apply(convert_label)
test_data['label_text'] = test_data['label_text'].apply(convert_label)
dev_data['label_text'] = dev_data['label_text'].apply(convert_label)

tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-uncased',
                                                       do_lower_case=True)
def get_tensor_dataset(dataset):
  sentences = dataset.tweet_text.values
  labels = dataset.label_text.values

  input_ids=[]
  attention_masks=[]
  for sent in sentences:
      encoded_dict = tokenizer.encode_plus(sent, add_special_tokens=True,
                                          max_length=64, pad_to_max_length=True,
                                          return_attention_mask=True,
                                          return_tensors='pt' )
      input_ids.append(encoded_dict['input_ids'])
      attention_masks.append(encoded_dict['attention_mask'])
  input_ids=torch.cat(input_ids,dim=0)
  attention_masks=torch.cat(attention_masks,dim=0)
  labels = torch.tensor(labels)
  data_set = TensorDataset(input_ids,attention_masks,labels)
  return data_set

train_set = get_tensor_dataset(train_data)
test_set = get_tensor_dataset(test_data)
dev_set = get_tensor_dataset(dev_data)


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [None]:
from  transformers import get_linear_schedule_with_warmup
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

epochs=4
batch_size = 32
train_dataloader = DataLoader(train_set, sampler=RandomSampler(train_set),batch_size=batch_size)

val_dataloader = DataLoader(dev_set, sampler=SequentialSampler(dev_set),batch_size=batch_size)

optimizer = AdamW(model_bert.parameters(),lr=5e-5)

total_steps= len(train_dataloader)*epochs

scheduler = get_linear_schedule_with_warmup(optimizer,num_warmup_steps=0,num_training_steps=total_steps)

def flat_accuracy(preds,labels):
    pred_flat = np.argmax(preds,axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat)/len(labels_flat)



In [None]:
model_bert.cuda()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [None]:

training_stats=[]
for epoch_i in range(0,epochs):
    print(" \n Epoch {:}/{:} \n Training....".format(epoch_i+1,epochs))
    total_train_loss=0
    model_bert.train()

    for step, batch in enumerate(train_dataloader):
        b_input_ids = batch[0].cuda()
        b_input_mask = batch[1].cuda()
        b_labels = batch[2].cuda()

        model_bert.zero_grad()

        result = model_bert(b_input_ids,token_type_ids=None,
                            attention_mask=b_input_mask,labels=b_labels,
                            return_dict=True)
        loss=result.loss
        logits=result.logits

        total_train_loss += loss.item()

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model_bert.parameters(),1.0)

        optimizer.step()
        scheduler.step()

    avg_train_loss = total_train_loss/len(train_dataloader)
    print('Average Training Loss = {0:.2f}'.format(avg_train_loss))
    print('Running Evaluation....')


    model_bert.eval()
    tot_eval_acc,tot_eval_loss,no_of_eval_steps = 0,0,0

    for batch in val_dataloader:

        b_input_ids = batch[0].cuda()
        b_input_mask = batch[1].cuda()
        b_labels = batch[2].cuda()

        with torch.no_grad():
            result = model_bert(b_input_ids,token_type_ids=None,
                                attention_mask=b_input_mask,labels=b_labels,
                                return_dict=True)

        loss=result.loss
        logits=result.logits

        tot_eval_loss += loss.item()

        logits=logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        tot_eval_acc += flat_accuracy(logits,label_ids)

    avg_val_acc = tot_eval_acc/len(val_dataloader)
    print("Accuracy : {0:.2f}".format(avg_val_acc))
    avg_val_loss = tot_eval_loss / len(dev_set)

    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training loss': avg_train_loss,
            'Valid loss': avg_val_loss,
            'Valid_acc': avg_val_acc,


        }
    )

    avg_val_loss = tot_eval_loss/len(val_dataloader)

 
 Epoch 1/4 
 Training....
Average Training Loss = 0.42
Running Evaluation....
Accuracy : 0.82
 
 Epoch 2/4 
 Training....
Average Training Loss = 0.28
Running Evaluation....
Accuracy : 0.82
 
 Epoch 3/4 
 Training....
Average Training Loss = 0.17
Running Evaluation....
Accuracy : 0.83
 
 Epoch 4/4 
 Training....
Average Training Loss = 0.09
Running Evaluation....
Accuracy : 0.82


In [None]:
model_path = path.join(project_folder, 'models', 'text.pth')
model_bert.save_pretrained(model_path)

# Multimodal

In [None]:
train_data['label'] = train_data['label'].apply(convert_label)
test_data['label'] = test_data['label'].apply(convert_label)
dev_data['label'] = dev_data['label'].apply(convert_label)

In [None]:
train_data.head()

Unnamed: 0,event_name,tweet_id,image_id,tweet_text,image,label,label_text,label_image,label_text_image
0,california_wildfires,917791291823591425,917791291823591425_0,RT @Cal_OES: PLS SHARE: Weâ€™re capturing wild...,data_image/california_wildfires/10_10_2017/917...,1,informative,1,Positive
1,california_wildfires,917791291823591425,917791291823591425_1,RT @Cal_OES: PLS SHARE: Weâ€™re capturing wild...,data_image/california_wildfires/10_10_2017/917...,0,informative,0,Negative
2,california_wildfires,917793137925459968,917793137925459968_0,RT @KAKEnews: California wildfires destroy mor...,data_image/california_wildfires/10_10_2017/917...,1,informative,1,Positive
3,california_wildfires,917793137925459968,917793137925459968_1,RT @KAKEnews: California wildfires destroy mor...,data_image/california_wildfires/10_10_2017/917...,1,informative,1,Positive
4,california_wildfires,917793137925459968,917793137925459968_2,RT @KAKEnews: California wildfires destroy mor...,data_image/california_wildfires/10_10_2017/917...,1,informative,1,Positive


In [None]:
from functools import reduce
class ImageTextDataset(Dataset):
    def __init__(self, data_set, image_dir = image_path, transform = None):
        self.image_dir = image_dir
        self.data = data_set
        self.transform = transform

    def __getitem__(self, index):
        row = self.data.iloc[index]
        image_path = path.join(self.image_dir, row['image'])
        label = row['label']

        # Load Image
        image = Image.open(image_path)
        image = image.resize((IMAGE_WIDTH, IMAGE_HEIGHT))
        image = image.convert('RGB')

        label_tensor = torch.tensor(label)
        image = self.transform(image)

        # Load text
        text = row['tweet_text']
        encoded_dict = tokenizer.encode_plus(
            text, add_special_tokens=True,
            max_length=64, pad_to_max_length=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        input_ids = encoded_dict['input_ids']
        attention_mask = encoded_dict['attention_mask']
        return (input_ids, attention_mask, image, label_tensor)

    def __len__(self):
        return len(self.data)

train_data = ImageTextDataset(train_data, image_dir = image_path, transform = transform)
test_data = ImageTextDataset(test_data, image_dir = image_path, transform = transform)
val_data = ImageTextDataset(dev_data, image_dir = image_path, transform = transform)

In [None]:
BATCH_SIZE = 32
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE,
                          shuffle=False, num_workers=2, pin_memory = True)

test_loader = DataLoader(test_data, batch_size=BATCH_SIZE,
                          shuffle=True, num_workers=2, pin_memory = True)

In [None]:
pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m67.8 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m107.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.14.1 tokenizers-0.13.3 transformers-4.28.1


In [None]:
from transformers import BertModel
import torch.nn.functional as F
import transformers

tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

class ImageTextModel(nn.Module):
  def __init__(self):
    super(ImageTextModel, self).__init__()
    self.bert = BertModel.from_pretrained(path.join(project_folder, 'models', 'text.pth'))
    self.resnet = ResNetWithFC(num_classes=2)
    self.resnet.load_state_dict(torch.load(path.join(project_folder, 'models', 'image.pth')))

    self.fc1 = nn.Linear(770, 256)
    self.fc2 = nn.Linear(256, 128)
    self.fc3 = nn.Linear(128, 2)
    self.softmax = nn.Softmax(dim=1)

  def forward(self, input_ids, attention_masks, image_tensors):

        # ResNet50
        with torch.no_grad():
            resnet_output = self.resnet(image_tensors)

        # BERT
        bert_output = self.bert(input_ids=input_ids, attention_mask=attention_masks)[1]

        # Concatenate the BERT and ResNet50 outputs
        combined_output = torch.cat((bert_output, resnet_output), dim=1)
        #combined_output = combined_output.squeeze(dim=770)
        # Fully connected layers
        x = F.relu(self.fc1(combined_output))
        x = self.fc2(x)
        output = self.fc3(x)
        output = self.softmax(output)

        return output

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
from torch.utils.data import DataLoader
import torch.optim as optim


train_data, test_data, val_data
batch_size = 32
train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=False)

# Initialize the model
model = ImageTextModel()
model.cuda()
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-5)
stats=[]
# Train the model for a specified number of epochs
num_epochs = 5
for epoch in range(num_epochs):
    print("\nEpoch {}/{}_____Training..........".format(epoch+1, num_epochs))

    total_train_loss=0
    # Set the model to train mode
    model.train()

    # Iterate over the batches in the training data
    for batch in train_dataloader:

        # Extract the inputs and targets
        input_ids = batch[0].cuda()
        attention_masks = batch[1].cuda()
        image_data = batch[2].cuda()
        targets = batch[3].cuda()

        # Zero the gradients
        optimizer.zero_grad()
        input_ids = input_ids.squeeze(dim=1)
        attention_masks = attention_masks.squeeze(dim=1)
        # Compute the model's predictions
        outputs = model(input_ids, attention_masks, image_data)

        # Compute the loss
        loss = criterion(outputs, targets)

        total_train_loss += loss.item()
        # Compute the gradients
        loss.backward()

        # Update the parameters
        optimizer.step()

    avg_train_loss = total_train_loss/len(train_dataloader)
    print('Average Training Loss = {0:.2f} '.format(avg_train_loss))
    print('Running Evaluation...')
    # Set the model to evaluation mode
    model.eval()

    # Compute the validation accuracy
    with torch.no_grad():
        correct = 0
        total = 0
        for batch in val_dataloader:
            input_ids = batch[0].cuda()
            attention_masks = batch[1].cuda()
            image_data = batch[2].cuda()
            targets = batch[3].cuda()
            input_ids = input_ids.squeeze(dim=1)
            attention_masks = attention_masks.squeeze(dim=1)
            outputs = model(input_ids, attention_masks, image_data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
        val_acc = 100 * correct / total

    # Print the loss and validation accuracy for each epoch
    print('Validation Accuracy: {:.2f}%'.format(val_acc))
    stats.append(
        {
            'epoch': epoch + 1,
            'training_loss': avg_train_loss,
            'val_acc': val_acc,
        }
    )



Some weights of the model checkpoint at path/to/fine_tuned_bert were not used when initializing BertModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Epoch 1/5_____Training..........
Average Training Loss = 1.87 
Running Evaluation...
Validation Accuracy: 96.06%
Epoch 2/5_____Training..........
Average Training Loss = 1.62 
Running Evaluation...
Validation Accuracy: 95.80%
Epoch 3/5_____Training..........
Average Training Loss = 1.37 
Running Evaluation...
Validation Accuracy: 95.54%
Epoch 4/5_____Training..........
Average Training Loss = 1.2