In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import os.path as path
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader


data_set = pd.read_excel(
    '/content/drive/MyDrive/CrisisMMD_v2.0/Tag_matched.xlsx'
    )
path_for_image = '/content/drive/MyDrive/CrisisMMD_v2.0'
data_set.head()

Unnamed: 0,tweet_id,image_id,text_info,text_info_conf,image_info,image_info_conf,text_human,text_human_conf,image_human,image_human_conf,image_damage,image_damage_conf,tweet_text,image_url,image_path
0,918000000000000000,917791044158185473_0,informative,1.0,informative,0.6766,other_relevant_information,1.0,other_relevant_information,0.6766,,,RT @Gizmodo: Wildfires raging through Northern...,http://pbs.twimg.com/media/DLyi_WYVYAApwNg.jpg,data_image/california_wildfires/10_10_2017/917...
1,918000000000000000,917793137925459968_0,informative,1.0,informative,1.0,infrastructure_and_utility_damage,1.0,infrastructure_and_utility_damage,1.0,severe_damage,1.0,RT @KAKEnews: California wildfires destroy mor...,http://pbs.twimg.com/media/DLtgmEPXUAEo1LV.jpg,data_image/california_wildfires/10_10_2017/917...
2,918000000000000000,917793137925459968_1,informative,1.0,informative,0.6538,infrastructure_and_utility_damage,1.0,infrastructure_and_utility_damage,0.6538,severe_damage,1.0,RT @KAKEnews: California wildfires destroy mor...,http://pbs.twimg.com/media/DLtgmEPXkAAvOdi.jpg,data_image/california_wildfires/10_10_2017/917...
3,918000000000000000,917793137925459968_2,informative,1.0,informative,1.0,infrastructure_and_utility_damage,1.0,infrastructure_and_utility_damage,1.0,severe_damage,0.6434,RT @KAKEnews: California wildfires destroy mor...,http://pbs.twimg.com/media/DLtgmF9X0AASfbh.jpg,data_image/california_wildfires/10_10_2017/917...
4,918000000000000000,917793736918216706_0,not_informative,0.6535,not_informative,1.0,not_humanitarian,0.6535,not_humanitarian,1.0,,,California wildfire. Ã¡Â½Â¡4 https://t.co/a8oD...,http://pbs.twimg.com/media/DLyoiI0X0AAsw1h.jpg,data_image/california_wildfires/10_10_2017/917...


In [None]:
data_set['image_human'].value_counts()
data_set = data_set[data_set['image_human'] != 'not_humanitarian']
data_set = data_set.dropna(subset=['tweet_text'])

In [None]:
labels_dict = {'affected_individuals': 0,'infrastructure_and_utility_damage':1, 'injured_or_dead_people': 2,
               'missing_or_found_people':3, 'other_relevant_information': 4,'rescue_volunteering_or_donation_effort':5,
                 'vehicle_damage': 6}

def convert_gender_label(gender_str):
    return labels_dict.get(gender_str, -1)  # return -1 for unknown labels
label_tuple = tuple(data_set.image_human.values)
numerical_label = [convert_gender_label(i) for i in label_tuple]
numerical_label

# **Image model**

In [None]:
from torch.utils.data import Dataset
from PIL import Image
from torchvision import transforms

IMAGE_WIDTH = 224
IMAGE_HEIGHT = 224
# If more preprocessing of image is requried,
# need to write a seprate function

# define the transformations to be applied to the dataset
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

class ImageDataset(Dataset):
    def __init__(self, data_set, image_dir = path_for_image, transform = None):
        self.image_dir = image_dir
        self.data = data_set.copy()
        self.transform = transform

    def __getitem__(self, index):
        row = self.data.iloc[index]
        image_path = path.join(path_for_image, row['image_path'])
        label = row['image_human']

        # Load Image
        image = Image.open(image_path)
        image = image.resize((IMAGE_WIDTH, IMAGE_HEIGHT))
        image = image.convert('RGB')

        label_tensor = torch.tensor(label)
        image = self.transform(image)
        return (image, label_tensor)

    def __len__(self):
        return len(self.data)


In [None]:
data_set['image_human'] = numerical_label
full_data = ImageDataset(data_set, transform = transform)
train_len = int(0.7*len(data_set))
test_len = len(data_set) - int (0.8 *train_len)
val_len = len(data_set) - train_len - test_len
train_data, test_data, val_data = random_split(full_data, [train_len, test_len, val_len])


In [None]:
BATCH_SIZE = 32
LEARNING_RATE = 0.0001
NUM_EPOCH = 20

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [None]:

class ResNetWithFC(nn.Module):
    def __init__(self, num_classes=1000, fc_hidden_dim=512):
        super(ResNetWithFC, self).__init__()
        self.resnet = models.resnet50(pretrained=True)
        self.fc1 = nn.Linear(2048, fc_hidden_dim)
        self.fc2 = nn.Linear(fc_hidden_dim, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.resnet.conv1(x)
        x = self.resnet.bn1(x)
        x = self.resnet.relu(x)
        x = self.resnet.maxpool(x)

        x = self.resnet.layer1(x)
        x = self.resnet.layer2(x)
        x = self.resnet.layer3(x)
        x = self.resnet.layer4(x)

        x = self.resnet.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.softmax(x)

        return x

resnet = ResNetWithFC(8, 512)

resnet.to(device)

train_loader = DataLoader(train_data, batch_size=BATCH_SIZE,
                          shuffle=False, num_workers=2, pin_memory = True)

test_loader = DataLoader(test_data, batch_size=BATCH_SIZE,
                          shuffle=True, num_workers=2, pin_memory = True)


# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet.parameters(), lr=LEARNING_RATE)

for epoch in range(NUM_EPOCH):
    # Train the model for one epoch
    resnet.train()
    train_loss = 0.0
    train_acc = 0.0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.type(torch.LongTensor)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = resnet(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        train_acc += torch.sum(preds == labels.data)
    train_loss = train_loss / len(train_loader.dataset)
    train_acc = train_acc.double() / len(train_loader.dataset)

    # Evaluate the model on the test set
    resnet.eval()
    test_loss = 0.0
    test_acc = 0.0
    with torch.no_grad():
      for images, labels in test_loader:
        images = images.to(device)
        labels = labels.type(torch.LongTensor)
        labels = labels.to(device)
        outputs = resnet(images)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        test_acc += torch.sum(preds == labels.data)
    test_loss = test_loss / len(test_loader.dataset)
    test_acc = test_acc.double() / len(test_loader.dataset)

    # Print the training and test accuracy and loss for this epoch
    print('Epoch {}/{} - Train Loss: {:.4f}, Train Acc: {:.4f}, Test Loss: {:.4f}, Test Acc: {:.4f}'
      .format(epoch + 1, NUM_EPOCH, train_loss, train_acc, test_loss, test_acc))

print(resnet.fc2)



Epoch 1/20 - Train Loss: 1.5228, Train Acc: 0.7552, Test Loss: 1.4176, Test Acc: 0.8584
Epoch 2/20 - Train Loss: 1.4070, Train Acc: 0.8688, Test Loss: 1.4297, Test Acc: 0.8418
Epoch 3/20 - Train Loss: 1.3921, Train Acc: 0.8826, Test Loss: 1.4131, Test Acc: 0.8654
Epoch 4/20 - Train Loss: 1.3769, Train Acc: 0.8973, Test Loss: 1.4176, Test Acc: 0.8523
Epoch 5/20 - Train Loss: 1.3768, Train Acc: 0.8980, Test Loss: 1.4487, Test Acc: 0.8252
Epoch 6/20 - Train Loss: 1.3789, Train Acc: 0.8950, Test Loss: 1.4162, Test Acc: 0.8593
Epoch 7/20 - Train Loss: 1.3612, Train Acc: 0.9130, Test Loss: 1.4007, Test Acc: 0.8759
Epoch 8/20 - Train Loss: 1.3777, Train Acc: 0.8961, Test Loss: 1.4240, Test Acc: 0.8488
Epoch 9/20 - Train Loss: 1.3622, Train Acc: 0.9119, Test Loss: 1.4092, Test Acc: 0.8636
Epoch 10/20 - Train Loss: 1.3564, Train Acc: 0.9186, Test Loss: 1.4168, Test Acc: 0.8575
Epoch 11/20 - Train Loss: 1.3646, Train Acc: 0.9096, Test Loss: 1.4502, Test Acc: 0.8252
Epoch 12/20 - Train Loss: 1.35

#**Multi modal**

## Text processing

In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m51.7 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m84.3 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 kB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.4 tokenizers-0.13.3 transformers-4.28.1


In [None]:
from transformers import AdamW, BertConfig
import torch
import math

In [None]:
import transformers
tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
from transformers import BertForSequenceClassification
model_bert = BertForSequenceClassification.from_pretrained('bert-base-uncased',num_labels=8,
                                                      output_attentions=False,
                                                      output_hidden_states=False)

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [None]:
data = data_set.copy()
sentences = data.tweet_text.values
labels = data.image_human.values
sentences = ['NA' if x != x else x for x in sentences]
type(labels)

numpy.ndarray

In [None]:
label_tuple = tuple(labels)
labels_dict = {'affected_individuals': 0,'infrastructure_and_utility_damage':1, 'injured_or_dead_people': 2,
               'missing_or_found_people':3,'not_humanitarian':4, 'other_relevant_information': 5,'rescue_volunteering_or_donation_effort':6,
                 'vehicle_damage': 7}

def convert_gender_label(gender_str):
    return labels_dict.get(gender_str, -1)  # return -1 for unknown labels


numerical_label = [convert_gender_label(i) for i in label_tuple]
print(numerical_label)  # Output

[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,

In [None]:
# Load the  tokenizer

tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

In [None]:
input_ids=[]
attention_masks=[]
for sent in sentences:
    encoded_dict = tokenizer.encode_plus(sent, add_special_tokens=True,
                                         max_length=64, pad_to_max_length=True,
                                         return_attention_mask=True,
                                         return_tensors='pt')
    input_ids.append(encoded_dict['input_ids'])
    attention_masks.append(encoded_dict['attention_mask'])
input_ids=torch.cat(input_ids,dim=0)
attention_masks=torch.cat(attention_masks,dim=0)
labels = torch.tensor(numerical_label)



In [None]:
labels

tensor([4, 1, 1,  ..., 4, 4, 4])

In [None]:
from torch.utils.data import TensorDataset, random_split

dataset = TensorDataset(input_ids,attention_masks,labels)

train_set, val_set = random_split(dataset, [int(0.9*len(dataset)),(len(dataset)-int(0.9*len(dataset)))])
train_set, test_set = random_split(train_set, [int(0.9*len(train_set)),(len(train_set)-int(0.9*len(train_set)))])


# Print the sizes of each set
print("Training set size:", len(train_set))
print("Validation set size:", len(val_set))
print("Test set size:", len(test_set))

Training set size: 3086
Validation set size: 382
Test set size: 343


In [None]:
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
batch_size = 32
train_dataloader = DataLoader(train_set, sampler=RandomSampler(train_set),batch_size=batch_size)
val_dataloader = DataLoader(val_set, sampler=SequentialSampler(val_set),batch_size=batch_size)

In [None]:
optimizer = AdamW(model_bert.parameters(),lr=5e-5)



In [None]:
from  transformers import get_linear_schedule_with_warmup
epochs=4
total_steps= len(train_dataloader)*epochs
scheduler = get_linear_schedule_with_warmup(optimizer,num_warmup_steps=0,num_training_steps=total_steps)

In [None]:
def flat_accuracy(preds,labels):
    pred_flat = np.argmax(preds,axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat)/len(labels_flat)

In [None]:
import random
import numpy as np
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)

In [None]:
model_bert.cuda()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [None]:
training_stats=[]
for epoch_i in range(0,epochs):
    print(" \n Epoch {:}/{:} \n Training....".format(epoch_i+1,epochs))
    total_train_loss=0
    model_bert.train()

    for step, batch in enumerate(train_dataloader):
        b_input_ids = batch[0].cuda()
        b_input_mask = batch[1].cuda()
        b_labels = batch[2].cuda()

        model_bert.zero_grad()

        result = model_bert(b_input_ids,token_type_ids=None,attention_mask=b_input_mask,labels=b_labels,return_dict=True)
        loss=result.loss
        logits=result.logits

        total_train_loss += loss.item()

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model_bert.parameters(),1.0)

        optimizer.step()
        scheduler.step()

    avg_train_loss = total_train_loss/len(train_dataloader)
    print('Average Training Loss = {0:.2f}'.format(avg_train_loss))
    print('Running Evaluation....')


    model_bert.eval()
    tot_eval_acc,tot_eval_loss,no_of_eval_steps = 0,0,0

    for batch in val_dataloader:

        b_input_ids = batch[0].cuda()
        b_input_mask = batch[1].cuda()
        b_labels = batch[2].cuda()

        with torch.no_grad():
            result = model_bert(b_input_ids,token_type_ids=None,attention_mask=b_input_mask,labels=b_labels,return_dict=True)

        loss=result.loss
        logits=result.logits

        tot_eval_loss += loss.item()

        logits=logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        tot_eval_acc += flat_accuracy(logits,label_ids)

    avg_val_acc = tot_eval_acc/len(val_dataloader)
    print("Accuracy : {0:.2f}".format(avg_val_acc))
    avg_val_loss = tot_eval_loss / len(val_set)

    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training loss': avg_train_loss,
            'Valid loss': avg_val_loss,
            'Valid_acc': avg_val_acc,


        }
    )

    avg_val_loss = tot_eval_loss/len(val_dataloader)

 
 Epoch 1/4 
 Training....
Average Training Loss = 0.32
Running Evaluation....
Accuracy : 0.97
 
 Epoch 2/4 
 Training....
Average Training Loss = 0.08
Running Evaluation....
Accuracy : 0.96
 
 Epoch 3/4 
 Training....
Average Training Loss = 0.03
Running Evaluation....
Accuracy : 0.98
 
 Epoch 4/4 
 Training....
Average Training Loss = 0.01
Running Evaluation....
Accuracy : 0.97


In [None]:
# Save the fine-tuned model
import os
#os.makedirs("path/to/fine_tuned_bert")
model_bert.save_pretrained("path/to/fine_tuned_bert")

## Image processing

In [None]:
from PIL import Image
import torchvision as tv
from tqdm import tqdm
transform_img = tv.transforms.Compose([
    tv.transforms.Resize((224,224)),
    tv.transforms.ToTensor(),
    tv.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
image_tensors = []
for image_sub_path in tqdm(data_set['image_path']):
  image_path = path.join(path_for_image, image_sub_path)
  image = Image.open(image_path)
  image = image.convert('RGB')
  image = transform_img(image)
  image_tensors.append(image)


100%|██████████| 3811/3811 [15:39<00:00,  4.06it/s]


In [None]:
len(input_ids), len(attention_masks), len(image_tensors), len(labels)
#image_tensors = torch.stack(image_tensors)

(3811, 3811, 3811, 3811)

In [None]:
# Creating tensor dataset
from torch.utils.data import TensorDataset, random_split

full_dataset = TensorDataset(
    input_ids,
    attention_masks,
    image_tensors,
    torch.LongTensor(labels)
    )
train_len = int(0.7*len(full_dataset))
test_len = len(full_dataset) - int (0.8 *len(full_dataset))
val_len = len(full_dataset) - train_len - test_len
train_data, test_data, val_data = random_split(full_dataset, [train_len, test_len, val_len])


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import BertModel

class MultiModalClassifier(nn.Module):
    def __init__(self, num_classes):
        super(MultiModalClassifier, self).__init__()

        # Add a linear layer to the ResNet50 backbone to reduce the feature dimensionality
        self.resnet = models.resnet50(pretrained=True)
        self.resnet.fc = nn.Linear(2048, 512)

        # Load the BERT model and freeze its parameters
        self.bert = BertModel.from_pretrained('path/to/fine_tuned_bert')

        # Combine the BERT and ResNet50 outputs
        self.fc1 = nn.Linear(512+768, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, input_ids, attention_masks, image_tensors):

        # ResNet50
        with torch.no_grad():
            resnet_output = self.resnet(image_tensors)

        # BERT
        bert_output = self.bert(input_ids=input_ids, attention_mask=attention_masks)[1]

        # Concatenate the BERT and ResNet50 outputs
        combined_output = torch.cat((bert_output, resnet_output), dim=1)

        # Fully connected layers
        x = F.relu(self.fc1(combined_output))
        x = F.relu(self.fc2(x))
        output = self.fc3(x)
        output = self.softmax(output)

        return output



In [None]:
from torch.utils.data import DataLoader
import torch.optim as optim

train_data, test_data, val_data
batch_size = 32
train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=False)

# Initialize the model
model = MultiModalClassifier(7)
model.cuda()
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-5)
stats=[]
# Train the model for a specified number of epochs
num_epochs = 5
for epoch in range(num_epochs):
    print("\nEpoch {}/{}_____Training..........".format(epoch+1, num_epochs))

    total_train_loss=0
    # Set the model to train mode
    model.train()

    # Iterate over the batches in the training data
    for batch in train_dataloader:

        # Extract the inputs and targets
        input_ids = batch[0].cuda()
        attention_masks = batch[1].cuda()
        image_data = batch[2].cuda()
        targets = batch[3].cuda()

        # Zero the gradients
        optimizer.zero_grad()

        # Compute the model's predictions
        outputs = model(input_ids, attention_masks, image_data)

        # Compute the loss
        loss = criterion(outputs, targets)

        total_train_loss += loss.item()
        # Compute the gradients
        loss.backward()

        # Update the parameters
        optimizer.step()

    avg_train_loss = total_train_loss/len(train_dataloader)
    print('Average Training Loss = {0:.2f} '.format(avg_train_loss))
    print('Running Evaluation...')
    # Set the model to evaluation mode
    model.eval()

    # Compute the validation accuracy
    with torch.no_grad():
        correct = 0
        total = 0
        for batch in val_dataloader:
            input_ids = batch[0].cuda()
            attention_masks = batch[1].cuda()
            image_data = batch[2].cuda()
            targets = batch[3].cuda()
            outputs = model(input_ids, attention_masks, image_data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
        val_acc = 100 * correct / total

    # Print the loss and validation accuracy for each epoch
    print('Validation Accuracy: {:.2f}%'.format(val_acc))
    stats.append(
        {
            'epoch': epoch + 1,
            'training_loss': avg_train_loss,
            'val_acc': val_acc,
        }
    )



Some weights of the model checkpoint at path/to/fine_tuned_bert were not used when initializing BertModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).



Epoch 1/5_____Training..........
Average Training Loss = 1.87 
Running Evaluation...
Validation Accuracy: 96.06%

Epoch 2/5_____Training..........
Average Training Loss = 1.62 
Running Evaluation...
Validation Accuracy: 95.80%

Epoch 3/5_____Training..........
Average Training Loss = 1.37 
Running Evaluation...
Validation Accuracy: 95.54%

Epoch 4/5_____Training..........
Average Training Loss = 1.25 
Running Evaluation...
Validation Accuracy: 96.33%

Epoch 5/5_____Training..........
Average Training Loss = 1.21 
Running Evaluation...
Validation Accuracy: 97.64%


In [None]:
print(stats)

[{'epoch': 1, 'training_loss': 1.8719795942306519, 'val_acc': 96.06299212598425}, {'epoch': 2, 'training_loss': 1.6244346683933621, 'val_acc': 95.8005249343832}, {'epoch': 3, 'training_loss': 1.3697825954073952, 'val_acc': 95.53805774278216}, {'epoch': 4, 'training_loss': 1.248588936669486, 'val_acc': 96.3254593175853}, {'epoch': 5, 'training_loss': 1.2144745900517417, 'val_acc': 97.63779527559055}]
