### Importing Python Modules

In [None]:
!pip install torchvision

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix, cohen_kappa_score

from PIL import Image

# Import nn module for building stacked layers and optimizers
import torch
from torch import nn, optim
import torchvision
from torchvision import datasets, models, transforms
from torch.nn import functional as F
# Import modules for dataset configuration and loading
from torch.utils.data import Dataset, DataLoader
import torch.optim.lr_scheduler as lr_scheduler

from transformers import AdamW

import os
from tqdm.notebook import tqdm
from collections import defaultdict
from textwrap import wrap
from PIL import Image, ImageFile, UnidentifiedImageError


In [3]:
import warnings
warnings.filterwarnings("ignore")
import pdb

### Reading the dataset

In [4]:

gossicop_fake = pd.read_csv('datasets/gossipcop_fake.csv')
gossicop_real = pd.read_csv('datasets/gossipcop_real.csv')
politifact_fake = pd.read_csv('datasets/politifact_fake.csv')
politifact_real = pd.read_csv('datasets/politifact_real.csv')

#### Adding labels to the dataset

In [5]:
gossicop_fake['labels']= 0
gossicop_real['labels']= 1

##### concatenating the dataset and shuffle it

In [6]:
df_gossip = pd.concat([gossicop_real, gossicop_fake], ignore_index=True)
df_gossip = shuffle(df_gossip)

In [7]:
df_gossip.columns

Index(['id', 'news_url', 'title', 'tweet_ids', 'labels'], dtype='object')

In [8]:
df_gossip.title.isnull().sum()

0

In [9]:
import read_data
df_gossip = read_data.load_dataset_gossipcop()

The dataset after dropping not news url containing rows
Fake dataset info: 
<class 'pandas.core.frame.DataFrame'>
Int64Index: 5067 entries, 0 to 5322
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   id         5067 non-null   object
 1   news_url   5067 non-null   object
 2   title      5067 non-null   object
 3   tweet_ids  4898 non-null   object
dtypes: object(4)
memory usage: 197.9+ KB
None
Real dataset info: 
<class 'pandas.core.frame.DataFrame'>
Int64Index: 16804 entries, 0 to 16816
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   id         16804 non-null  object
 1   news_url   16804 non-null  object
 2   title      16804 non-null  object
 3   tweet_ids  15747 non-null  object
dtypes: object(4)
memory usage: 656.4+ KB
None
Final dataset description:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 21871 entries, 10600 to 20862
Data columns (total 5

In [10]:
df_gossip.head()

Unnamed: 0,id,news_url,title,tweet_ids,labels
10147,gossipcop-885134,https://www.tmz.com/2018/06/07/jeremy-meeks-me...,Jeremy and Melissa Meeks' Divorce a Done Deal,915995914329743360\t915996984980443136\t916003...,0
11083,gossipcop-893342,https://www.instyle.com/news/leonardo-dicaprio...,Leonardo DiCaprio's 43rd Birthday Bash Was a S...,929926564996714496\t929928091702583296\t929928...,0
4769,gossipcop-911046,https://ew.com/tv/2018/02/07/law-order-svu-rau...,Raúl Esparza exits Law & Order: SVU after six ...,961445047517569025\t961455185854803969\t961458...,0
13563,gossipcop-911815,https://ew.com/tv/2018/10/09/this-is-us-three-...,"This Is Us producers on three-Kate scene, Jack...",960682277289242629\t960682516935036928\t960682...,0
243,gossipcop-848433,https://www.dailymail.co.uk/tvshowbiz/article-...,"Sharon Stone, 60, shares very rare photo with ...",860140423016177667\t860140790751916033\t860140...,0


In [11]:
len(df_gossip)

3549

In [26]:
test = df_gossip.copy().head(10)

In [27]:
print(len(test))

10


### Images paths

In [28]:
image_path = 'gossipcop_images/'

In [29]:
import os
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
from torchvision import models, transforms
from transformers import BertModel, BertTokenizer, DistilBertModel

In [30]:
# Load pre-trained models
resnet = models.resnet50(pretrained=True) # resnet18
resnet = nn.Sequential(*list(resnet.children())[:-1])  # Remove the last layer to get embeddings
bert = BertModel.from_pretrained("bert-base-uncased")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


### Assigning device to train the model

In [31]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if str(device) == "cpu":
    print("CPU is allocated.")
else:
    print("GPU is allocated.")

CPU is allocated.


### Dataloader

In [32]:
# Custom dataset class
class MultiModalDataset(torch.utils.data.Dataset):
    def __init__(self, df, image_folder, tokenizer, transform):
        self.df = df
        self.image_folder = image_folder
        self.tokenizer = tokenizer
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        #pdb.set_trace()
        text, post_id, labels = row["title"], row["id"], row["labels"]
        labels = torch.tensor(labels, dtype=torch.long)

        images_path = os.path.join(self.image_folder, post_id)
        images = []
        if os.path.exists(images_path):
            image_files = os.listdir(images_path)

            for img in image_files:
                try:
                    image = Image.open(os.path.join(images_path, img))
                    if image.mode != 'RGB':
                        image = image.convert('RGB')
                    images.append(image)
                # Handling Exception and randomly initializing pixels
                except Exception:
                    images.append(Image.new("RGB", (224, 224), "black"))
        else:
            images = [Image.new("RGB", (224, 224), "black")]
            
        inputs = self.tokenizer(text, return_tensors="pt", padding="max_length", truncation=True)
        images = [self.transform(img) for img in images]
        images_tensor = torch.stack(images)
        sample = {'text': inputs, 'images': images_tensor, 'labels': labels}

        return sample


In [33]:
# Define image transformations
transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.255]
    )
])

# Just normalization for validation
val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.255]
    )
])


In [50]:
# Create the dataset
dataset = MultiModalDataset(test, image_path, tokenizer, transform )
# Create the data loader
train_dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

### Model 

In [51]:
import torch
import torch.nn as nn

class MultiModalModel(nn.Module):
    def __init__(self, bert, resnet):
        super().__init__()
        self.bert = bert
        self.resnet = resnet
        self.drop = nn.Dropout(p=0.3)
        
        # Defining the attention mechanism for the model
        self.image_to_title_attention = nn.MultiheadAttention(bert.config.hidden_size, num_heads=4)  # Increase num_heads
        
        self.linear = nn.Linear(2048, bert.config.hidden_size)
        self.norm = nn.BatchNorm1d(bert.config.hidden_size)
        self.relu = nn.ReLU()  # Add ReLU activation
        self.hidden = nn.Linear(bert.config.hidden_size, bert.config.hidden_size)  # Add hidden layer
        self.classifier = nn.Linear(bert.config.hidden_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, inputs, images):
        # Process text input
        text_output = self.bert(**inputs).last_hidden_state[:, 0, :]

        # Process image input
        img_embeddings = [self.resnet(img.unsqueeze(0)).view(-1) for img in images]
        img_embeddings = torch.stack(img_embeddings)
        print(img_embeddings.shape)
        img_embeddings = self.linear(img_embeddings)
        img_embeddings = self.norm(img_embeddings)  # Apply batch normalization
        img_embeddings = self.relu(img_embeddings)  # Apply ReLU activation

        # Calculate attention between text and each image
        attention_outputs = []
        for img_emb in img_embeddings:
            img_emb = img_emb.view(1, 1, 768)
            # text_output.unsqueeze(1).shape (1, batch_size, hidden_size) => (1, 2, 768)
            #img_emb.shape => (1, 1, hidden_size)
            att_out, _ = self.image_to_title_attention(text_output.unsqueeze(1), img_emb, img_emb)
            attention_outputs.append(att_out)

        # Average attention outputs
        attention_output = torch.stack(attention_outputs).mean(dim=0)

        # Classifier
        logits = self.hidden(attention_output.squeeze(1))  # Apply hidden layer
        logits = self.drop(logits)  # Apply dropout to the hidden layer
        logits = self.classifier(logits)
        return self.sigmoid(logits)
       


In [52]:
model = MultiModalModel(bert, resnet)
# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Set up the loss function and optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)
epochs = 10
criterion = nn.CrossEntropyLoss()



### Training loop

In [53]:

# Training loop
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for data in tqdm(train_dataloader):
        # Move data to the device
        inputs = {k: v.squeeze(1).to(device) for k, v in data['text'].items()}
        #inputs = data['text'].to(device)
        #images = torch.stack(data['images']).to(device)
        images = data['images'].to(device)
        labels = data['labels'].to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs, images.squeeze(1))
        loss = criterion(outputs.squeeze(), labels.float())

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print average loss for the epoch
    avg_loss = running_loss / len(train_dataloader)
    print(f"Epoch: {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")

  0%|          | 0/5 [00:00<?, ?it/s]

RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [1, 3, 3, 224, 224]

In [None]:
def train_model(model, data_loader, loss_function, optimizer, device, num_examples):
    print("Training model in progress..")
    print("-" * 15)
    model = model.train()
    train_losses = []
    correct_preds = 0
    for data in tqdm(data_loader):
        input_ids = data["input_ids"].to(device)
        attention_mask = data["attention_mask"].to(device)
        images = data["image"].to(device)
        labels = data["label"].to(device)
  
        outputs = model(
                title_input_ids = input_ids,
                title_attention_mask = attention_mask,
                image = images
        )
        
        _, preds = torch.max(outputs, dim=1)
        train_loss = loss_function(outputs, labels)
        correct_preds += torch.sum(preds == labels)
        train_losses.append(train_loss.item())
        train_loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        optimizer.zero_grad()
            
    # Return train_acc and train_loss values
    return correct_preds.double() / num_examples, np.mean(train_losses)


In [None]:
def evaluate_model(model, data_loader, loss_function, device, num_examples):
    print("validation of the model in progress...")
    print("-" * 15)
    model = model.eval()
    val_losses = []
    correct_preds = 0
    with torch.no_grad():
        for data in tqdm(data_loader):
            input_ids = data["input_ids"].to(device)
            attention_mask = data["attention_mask"].to(device)
            images = data["image"].to(device)
            labels = data["label"].to(device)
            
            outputs = model(
                    title_input_ids = input_ids,
                    title_attention_mask = attention_mask,
                    image = images
                 )
            
            _, preds = torch.max(outputs, dim=1)
            
            val_loss = loss_function(outputs, labels)
            correct_preds += torch.sum(preds == labels)
            val_losses.append(val_loss.item())
    return correct_preds.double() / num_examples, np.mean(val_losses)
                

In [None]:
best_accuracy = 0

# Iteration times the total number of epochs
for epoch in range(EPOCHS):

    print(f"Epoch {epoch + 1}/{EPOCHS}")
    print("-" * 10)

    train_acc, train_loss = train_model(
        model,
        train_data_loader,
        loss_function,
        optimizer,
        device,
        len(df_train)
    )
    

    print(f"Train loss {train_loss} | Accuracy {train_acc}")
    print()
    val_acc, val_loss = evaluate_model(
            model,
            validate_data_loader,
            loss_function,
            device,
            len(df_val)
    )

    print(f"Val   loss {val_loss} | Accuracy {val_acc}")
    print()

print()
print("Completed Training!")
print("-" * 20)