In [None]:
#!unzip /content/drive/MyDrive/MemoSen.zip -d /content/drive/MyDrive/MemoSen_Extracted

In [None]:
import os
import pandas as pd

# Paths
base_dir = '/kaggle/input/memosen/Dataset'
image_dir = os.path.join(base_dir, 'Memes')  # Path to the extracted "Memes" folder
input_excel = os.path.join(base_dir, 'multi-sent.xlsx')  # Path to the Excel file
output_csv = os.path.join("/kaggle/working/", 'image_caption_labels.csv')  # Path for the output CSV file

# Step 1: Get all image files from the Memes folder
image_files = os.listdir(image_dir)

# Step 2: Load the Excel file
df = pd.read_excel(input_excel)

# Step 3: Create a new list to store data for CSV
image_data = []

# Step 4: Iterate through the Excel data and match image_name with files
for index, row in df.iterrows():
    image_name = row['image_name']  # Assuming the column name is 'image_name'

    # Check if the image exists in the folder
    if image_name in image_files:
        # Construct the full image path
        image_path = os.path.join(image_dir, image_name)

        # Extract Captions and Label_Sentiment (adjust column names if necessary)
        caption = row['Captions']  # Assuming 'Captions' is the column name for captions
        label_sentiment = row['Label_Sentiment']  # Assuming 'Label_Sentiment' is the column name

        # Append the row with image path, image name, captions, and sentiment label
        image_data.append([image_path, image_name, caption, label_sentiment])

# Step 5: Create a DataFrame for the matched data
image_df = pd.DataFrame(image_data, columns=['Image_path', 'image_name', 'Captions', 'Label_Sentiment'])

# Step 6: Save the DataFrame to a CSV file
image_df.to_csv(output_csv, index=False)

print(f"CSV file saved at {output_csv}")


In [None]:
import pandas as pd

# Path to the CSV file (replace this with the actual path if different)
csv_file = '/kaggle/working/image_caption_labels.csv'

# Step 1: Load the CSV file into a DataFrame
df = pd.read_csv(csv_file)

# Step 2: Print the DataFrame to show the contents
df.head()


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Step 2: Split the data into train (70%), test (20%), and validation (10%) using stratified splitting
train_df, temp_df = train_test_split(df, test_size=0.3, stratify=df['Label_Sentiment'], random_state=42)
test_df, val_df = train_test_split(temp_df, test_size=1/3, stratify=temp_df['Label_Sentiment'], random_state=42)

# Step 3: Save the split data into separate CSV files
train_df.to_csv('/kaggle/working/train.csv', index=False)
test_df.to_csv('/kaggle/working/test.csv', index=False)
val_df.to_csv('/kaggle/working/validation.csv', index=False)

# Print the shapes of the resulting datasets for verification
print(f"Train shape: {train_df.shape}")
print(f"Test shape: {test_df.shape}")
print(f"Validation shape: {val_df.shape}")


In [None]:
import pandas as pd
import re
import string

# Function to remove punctuation (preserve Bangla characters)
def remove_punctuation(text):
    if pd.isna(text):
        return text
    return text.translate(str.maketrans('', '', string.punctuation))

# Function to remove extra whitespace
def remove_whitespace(text):
    if pd.isna(text):
        return text
    return " ".join(text.split())

# Function to remove emojis
def remove_emojis(text):
    if pd.isna(text):
        return text
    emoji_pattern = re.compile(
        "["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
        u"\U00002702-\U000027B0"
        u"\U000024C2-\U0001F251"
        "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)

# Function to remove URLs
def remove_urls(text):
    if pd.isna(text):
        return text
    url_pattern = re.compile(r'https?://\S+|www\.\S+')
    return url_pattern.sub(r'', text)

# Function to remove HTML tags
def remove_html(text):
    if pd.isna(text):
        return text
    html_pattern = re.compile(r'<.*?>')
    return html_pattern.sub(r'', text)

# Function to remove special characters (preserve Bangla characters)
def remove_special_characters(text):
    if pd.isna(text):
        return text
    return re.sub(r'[^A-Za-z0-9\s\u0980-\u09FF]', '', text)

# Combine all cleaning functions
def clean_text(text):
    text = remove_urls(text)
    text = remove_html(text)
    text = remove_emojis(text)
    text = remove_punctuation(text)
    text = remove_special_characters(text)
    text = remove_whitespace(text)
    return text

# Mapping categories to integers
category_mapping = {
    'positive': 0,
    'negative': 1,
    'neutral': 2,
}

# Paths to the previously saved CSVs
csv_paths = {
    'Train': '/kaggle/working/train.csv',
    'Test': '/kaggle/working/test.csv',
    'Validation': '/kaggle/working/validation.csv'
}

# Output paths for the cleaned CSVs
cleaned_output_paths = {
    'Train': '/kaggle/working/train_cleaned.csv',
    'Test': '/kaggle/working/test_cleaned.csv',
    'Validation': '/kaggle/working/validation_cleaned.csv'
}

# Text columns to clean
text_columns = ['Captions', 'Label_Sentiment']

# Loop through each dataset
for key in csv_paths:
    # Load the dataset
    df = pd.read_csv(csv_paths[key])

    # Apply cleaning to all relevant text columns
    for column in text_columns:
        df[column] = df[column].astype(str).apply(clean_text)

    # Map the 'Label_Sentiment' column to integers
    df['Label_Sentiment'] = df['Label_Sentiment'].map(category_mapping)

    # Add a 'label' column (same as 'Label_Sentiment' for now)
    df['label'] = df['Label_Sentiment']

    # Display the cleaned dataframe
    print(f"Cleaned {key} dataframe:")
    print(df.head())

    # Save the cleaned dataframe to a new CSV file
    df.to_csv(cleaned_output_paths[key], index=False)
    print(f"Cleaned dataframe saved to {cleaned_output_paths[key]}\n")


In [None]:
train_df = pd.read_csv('/kaggle/working/train_cleaned.csv')
train_df.head()

In [None]:
test_df = pd.read_csv('/kaggle/working/test_cleaned.csv')
test_df.head()

In [None]:
validation_df = pd.read_csv('/kaggle/working/validation_cleaned.csv')
validation_df.head()

In [12]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from transformers import BertTokenizer, BertModel, AdamW
import torchvision.models as models
from tqdm import tqdm

In [3]:
# !pip install --upgrade transformers

In [4]:
# pip show transformers torch torchvision

In [None]:
pip install transformers==4.33.0

In [None]:
import torch
import torchvision.models as models
from transformers import AutoImageProcessor, ViTModel

image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
model = ViTModel.from_pretrained("google/vit-base-patch16-224-in21k")

In [None]:
from transformers import BertTokenizer, BertModel,AdamW
# Initialize BERT tokenizer and model
bert_tokenizer = BertTokenizer.from_pretrained('distilbert/distilbert-base-uncased')
bert_model = BertModel.from_pretrained("distilbert/distilbert-base-uncased")

In [None]:
# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

In [16]:
import os

# Enable device-side assertions
os.environ['TORCH_USE_CUDA_DSA'] = '1'

In [None]:
model.to(device)

In [None]:
bert_model.to(device)

In [21]:
from torchvision import transforms
from PIL import Image
from torch.utils.data import Dataset

max_seq_length = 512  # Set your desired maximum sequence length for BERT

# Define the pre-processing transformations for images
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

class MyMultimodalDataset(Dataset):
    def __init__(self, data, transform=None, tokenizer=None, max_seq_length=512):
        self.data = data
        self.transform = transform
        self.tokenizer = tokenizer
        self.max_seq_length = max_seq_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_path = self.data.iloc[idx]['Image_path']
        try:
            image = Image.open(image_path).convert('RGB')
            if self.transform is not None:
                image = self.transform(image)
        except Exception as e:
            print(f"Error loading image at index {idx}: {e}")
            return None, None, None, None

        if image is None:
            return None, None, None, None

        context = self.data.iloc[idx]['Captions']

        inputs = self.tokenizer(context, padding='max_length', truncation=True, max_length=self.max_seq_length, return_tensors='pt')
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']

        label = self.data.iloc[idx]['label']

        return image, input_ids, attention_mask, label

In [22]:
# Create custom datasets with MyMultimodalDataset
train_dataset = MyMultimodalDataset(train_df, transform=transform, tokenizer=bert_tokenizer, max_seq_length=max_seq_length)
test_dataset = MyMultimodalDataset(test_df, transform=transform, tokenizer=bert_tokenizer, max_seq_length=max_seq_length)
val_dataset = MyMultimodalDataset(validation_df, transform=transform, tokenizer=bert_tokenizer, max_seq_length=max_seq_length)

# Define data loaders
batch_size = 1  # Set your desired batch size
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
import torch
import torchvision.models as models
from transformers import DistilBertModel, AdamW, AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
import time
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Assuming you have defined your train_loader, val_loader, optimizer, criterion, model, bert_model, etc.

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the regressor model and optimizer
regressor = torch.nn.Sequential(
    torch.nn.Linear(11548, 512),  # Adjusted input dimension
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(512, 3)  # Adjusted output dimension for your task
).to(device)

optimizer = torch.optim.AdamW(regressor.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

num_epochs = 40
train_losses = []
val_losses = []
start_time = time.time()

for epoch in range(num_epochs):
    running_train_loss = 0.0

    regressor.train()

    # Wrap the training loop with tqdm
    for images, texts, attention_masks, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training", ncols=100, leave=False):
        images = images.to(device)
        labels = labels.to(device)

        input_ids = texts.squeeze(1).to(device)
        attention_mask = attention_masks.squeeze(1).to(device)

        optimizer.zero_grad()

        # Extract features from images
        with torch.no_grad():
            outputs_image = model(pixel_values=images)
        img_hidden_states = outputs_image.last_hidden_state
        img_feats = img_hidden_states[:, 0, :]

        # Extract features from text
        outputs_text = bert_model(input_ids=input_ids, attention_mask=attention_mask)
        text_hidden_states = outputs_text.last_hidden_state
        text_feats = text_hidden_states[:, 0, :]

        # Combine features
        combined_feats = torch.cat((img_feats, text_feats), dim=1)

        # Forward pass through regressor
        predictions = regressor(combined_feats)
        loss = criterion(predictions, labels)

        loss.backward()
        optimizer.step()

        running_train_loss += loss.item()

    epoch_train_loss = running_train_loss / len(train_loader)
    train_losses.append(epoch_train_loss)

    regressor.eval()

    running_val_loss = 0.0

    # Wrap the validation loop with tqdm
    with torch.no_grad():
        for val_images, val_texts, val_attention_masks, val_labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation", ncols=100, leave=False):
            val_images = val_images.to(device)
            val_labels = val_labels.to(device)

            val_input_ids = val_texts.squeeze(1).to(device)
            val_attention_mask = val_attention_masks.squeeze(1).to(device)

            # Extract features for validation images
            outputs_image = model(pixel_values=val_images)
            val_img_hidden_states = outputs_image.last_hidden_state
            val_img_feats = val_img_hidden_states[:, 0, :]

            # Extract features for validation text
            outputs_text = bert_model(input_ids=val_input_ids, attention_mask=val_attention_mask)
            val_text_hidden_states = outputs_text.last_hidden_state
            val_text_feats = val_text_hidden_states[:, 0, :]

            # Combine features for validation
            val_combined_feats = torch.cat((val_img_feats, val_text_feats), dim=1)

            # Forward pass through regressor for validation
            val_predictions = regressor(val_combined_feats)
            val_loss = criterion(val_predictions, val_labels)

            running_val_loss += val_loss.item()

    epoch_val_loss = running_val_loss / len(val_loader)
    val_losses.append(epoch_val_loss)

    print(f"Epoch [{epoch + 1}/{num_epochs}] - "
          f"Train Loss: {epoch_train_loss:.4f}, "
          f"Val Loss: {epoch_val_loss:.4f}")

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time: {execution_time:.2f} seconds")


In [None]:
import torch
from tqdm import tqdm
import time

# Set models to evaluation mode
model.eval()
bert_model.eval()

# Prepare lists to store predicted and true labels
predicted_labels = []
true_labels = []

# Set your device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Test loop
start_time = time.time()

with torch.no_grad():
    for test_images, test_texts, test_attention_masks, test_labels in tqdm(test_loader, desc='Testing', leave=False):
        test_images = test_images.to(device)
        test_labels = (test_labels).to(device)  # Convert labels from 1-5 to 0-4
        test_texts = test_texts.to(device)
        test_attention_masks = test_attention_masks.to(device)

        optimizer.zero_grad()

        # Extract features from image-based model
        test_img_feats = model(test_images)

        # Extract features from text-based model (BERT)
        test_texts = test_texts.squeeze(1)
        test_attention_masks = test_attention_masks.squeeze(1)
        test_outputs = bert_model(input_ids=test_texts, attention_mask=test_attention_masks)

        # Extract relevant features for concatenation
        # Handle tensor manipulation for compatibility
        # Reshape test_img_feats if compatible
        test_img_feats = test_img_feats[0]  # Access a specific part of the BaseModelOutputWithNoAttention
        test_img_feats = test_img_feats.reshape(test_img_feats.shape[0], -1)  # Reshape if compatible

        # Extract representations from text-based model
        test_text_feats = test_outputs.last_hidden_state[:, 0, :]  # Select appropriate representations

        # Combine features early
        combined_feats = torch.cat((test_img_feats, test_text_feats), dim=1)

        # Classify combined features
        combined_classifier = torch.nn.Sequential(
            torch.nn.Linear(combined_feats.shape[1], 512).to(device),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(512, 3).to(device),  # Change the output size to 3 for 3 labels
        )

        combined_logits = combined_classifier(combined_feats)
        test_predictions = torch.nn.functional.softmax(combined_logits, dim=1)
        predicted_classes = torch.argmax(test_predictions, dim=1)  # Revert back to labels from 1-5, Add 1 here

        predicted_labels.extend(predicted_classes.cpu().numpy())
        true_labels.extend(test_labels.cpu().numpy().tolist())

end_time = time.time()
execution_time = end_time - start_time

# Print or use the predicted labels and true labels as needed
print("Predicted Labels:", predicted_labels)
print("True Labels:", true_labels)
print(f"Total execution time for testing: {execution_time:.2f} seconds")

In [None]:
predicted_labels

In [None]:
true_labels

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, mean_squared_error, classification_report

# Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)

# Calculate precision, recall, F1-score overall (macro average)
precision, recall, f1_score_macro, _ = precision_recall_fscore_support(true_labels, predicted_labels, average='macro')

# Calculate weighted precision, recall, and F1-score
precision_weighted, recall_weighted, f1_score_weighted, _ = precision_recall_fscore_support(true_labels, predicted_labels, average='weighted')

# Calculate confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)

# Calculate Mean Squared Error
mse = mean_squared_error(true_labels, predicted_labels)

# Calculate Sensitivity (Recall) for each class
sensitivity_per_class = recall

# Calculate Specificity for each class
specificity_per_class = []
for i in range(len(conf_matrix)):
    tn = np.sum(conf_matrix) - (np.sum(conf_matrix[i, :]) + np.sum(conf_matrix[:, i]) - conf_matrix[i, i])
    fp = np.sum(conf_matrix[:, i]) - conf_matrix[i, i]
    specificity_per_class.append(tn / (tn + fp))

# Print overall calculated metrics
print(f"Accuracy: {accuracy}")
print(f"Precision (macro): {precision}")
print(f"Recall (macro): {recall}")
print(f"F1-Score (macro): {f1_score_macro}")
print(f"Weighted F1-Score: {f1_score_weighted}")
print(f"Mean Squared Error: {mse}")

# Print Sensitivity and Specificity for each class
print(f"Sensitivity (Recall) for each class: {sensitivity_per_class}")
print(f"Specificity for each class: {specificity_per_class}")
