# Multimodal Sentiment Analysis

In [1]:
import numpy as np
import pandas as pd
pd.options.plotting.backend = "plotly"
import random
from glob import glob
import os, shutil
from tqdm import tqdm
tqdm.pandas()
import time
import copy
import joblib
from collections import defaultdict
import gc
from IPython import display as ipd

# visualization
import cv2
import matplotlib.pyplot as plt

# Sklearn
from sklearn.model_selection import StratifiedKFold, KFold

# PyTorch 
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader

# Model Logging
import wandb

import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
c_  = Fore.GREEN
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

# Config

In [10]:
class Config:
    seed          = 101
    debug         = False # set debug=False for Full Training
    exp_name      = 'vit/sbert'
    model_name    = 'vit-sbert-multimodal'
    backbone      = 'google/vit-base-patch16-224+sentence-transformers/all-mpnet-base-v2'
    tokenizer     = 'sentence-transformers/all-mpnet-base-v2'
    image_encoder = 'google/vit-base-patch16-224'
    train_bs      = 24
    valid_bs      = 48
    img_size      = [224, 224]
    max_len       = 128
    epochs        = 50
    lr            = 5e-3
    scheduler     = 'CosineAnnealingLR'
    min_lr        = 1e-6
    T_max         = int(100*6*1.8)
    T_0           = 25
    warmup_epochs = 0
    wd            = 1e-6
    n_accumulate  = 32//train_bs
    n_fold        = 5
    num_classes   = 3
    device        = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    competition   = 'memotions-7k'

# Reproducibility

In [3]:
def set_seed(seed: int = 42):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    print('>>> SEEDED <<<')
    
set_seed(Config.seed)

>>> SEEDED <<<


# Meta Data

In [5]:
df = pd.read_csv('../memotion_dataset_7k/folds.csv')
df['text_corrected'] = df['text_corrected'].apply(lambda x: str(x).lower())
df.head()

Unnamed: 0,image_name,text_ocr,text_corrected,humour,sarcasm,offensive,motivational,overall_sentiment,label,kfold
0,image_4112.jpg,YOU HAVE NEVER SEEN TITANIC?!? RU SRS? quickme...,you have never seen titanic?!? r u srs? quickm...,very_funny,general,slight,not_motivational,positive,1,0
1,image_3710.jpg,SAYS THEY'VE BEEN REBUILDING FOR 5 YEARS #SCOT...,says they've been rebuilding for 5 years #scot...,very_funny,twisted_meaning,very_offensive,motivational,positive,2,0
2,image_3739.jpg,spiderman homecoming looks amazing,spiderman homecoming looks amazing,very_funny,general,not_offensive,not_motivational,positive,0,0
3,image_62.jpg,THANOS GETS ALL INFINITY STONES AT THE END OF ...,thanos gets all infinity stones at the end of ...,very_funny,general,not_offensive,not_motivational,positive,0,0
4,image_5891.jpg,I do not want pizza I want my oscar,i do not want pizza i want my oscar,funny,not_sarcastic,slight,not_motivational,neutral,1,0


In [7]:
df['path'] = df['image_name'].apply(lambda x: os.path.join('../memotion_dataset_7k/images/', x))
df.head()

Unnamed: 0,image_name,text_ocr,text_corrected,humour,sarcasm,offensive,motivational,overall_sentiment,label,kfold,path
0,image_4112.jpg,YOU HAVE NEVER SEEN TITANIC?!? RU SRS? quickme...,you have never seen titanic?!? r u srs? quickm...,very_funny,general,slight,not_motivational,positive,1,0,../memotion_dataset_7k/images/image_4112.jpg
1,image_3710.jpg,SAYS THEY'VE BEEN REBUILDING FOR 5 YEARS #SCOT...,says they've been rebuilding for 5 years #scot...,very_funny,twisted_meaning,very_offensive,motivational,positive,2,0,../memotion_dataset_7k/images/image_3710.jpg
2,image_3739.jpg,spiderman homecoming looks amazing,spiderman homecoming looks amazing,very_funny,general,not_offensive,not_motivational,positive,0,0,../memotion_dataset_7k/images/image_3739.jpg
3,image_62.jpg,THANOS GETS ALL INFINITY STONES AT THE END OF ...,thanos gets all infinity stones at the end of ...,very_funny,general,not_offensive,not_motivational,positive,0,0,../memotion_dataset_7k/images/image_62.jpg
4,image_5891.jpg,I do not want pizza I want my oscar,i do not want pizza i want my oscar,funny,not_sarcastic,slight,not_motivational,neutral,1,0,../memotion_dataset_7k/images/image_5891.jpg


# Dataset

In [11]:
import os
from typing import Dict

import albumentations as A
import numpy as np
import pandas as pd
import torch
from albumentations.pytorch import ToTensorV2
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer

# from config import Config


class MemotionDataset(Dataset):
    def __init__(self, df: pd.DataFrame) -> None:
        super().__init__()
        self.df = df
        self.tokenizer = AutoTokenizer.from_pretrained(Config.tokenizer)
        self.transforms = A.Compose([
            A.Resize(height=Config.img_size[0], width=Config.img_size[1]),
            ToTensorV2(),
        ])

    def __len__(self) -> int:
        return self.df.shape[0]
    
    def __getitem__(self, ix: int) -> Dict[str, torch.Tensor]:
        row = self.df.iloc[ix]

        # Image
        image_path = os.path.join('../memotion_dataset_7k/images', row['image_name'].lower())
        img = np.array(Image.open(image_path).convert('RGB'))
        img = self.transforms(image=img)['image']

        # Text
        text = row['text_corrected'].lower()
        out = self.tokenizer(
            text=text, 
            max_length=Config.max_len,
            padding='max_length',
            truncation=True,
            return_tensors="pt"
        )
        # __import__('pprint').pprint(out)

        return  {
            'image': img, 
            'input_ids': out['input_ids'].squeeze(),
            'attention_mask': out['attention_mask'].squeeze()
        }


if __name__ == '__main__':
    dataset = MemotionDataset(df=pd.read_csv('../memotion_dataset_7k/folds.csv'))
    dataloader = DataLoader(dataset=dataset, batch_size=Config.train_bs, shuffle=True)
    batch = next(iter(dataloader))
    print(batch.keys())

    __import__('pprint').pprint({
        k: v.shape for k, v in batch.items()
    })

dict_keys(['image', 'input_ids', 'attention_mask'])
{'attention_mask': torch.Size([24, 128]),
 'image': torch.Size([24, 3, 224, 224]),
 'input_ids': torch.Size([24, 128])}


In [13]:
images = batch['image']
images.shape

torch.Size([24, 3, 224, 224])

In [None]:
fig, axs = plt.subplots(6, 4, figsize=(18, 18))
for ix, ax in enumerate(axs.flatten()):
    img = images[ix].permute(1, 2, 0)
    ax.imshow(img)
    ax.set_xticks([])
    ax.set_yticks([])

plt.tight_layout()
plt.show()