In [1]:
from TT_Blip_architecture.classifier_layer import ClsfLayer
from TT_Blip_architecture.feature_extraction_layer import FeatureExtractionLayer
from TT_Blip_architecture.fusion_layer import FusionLayer
from TT_Blip_architecture.tt_blip import TT_Blip
from TT_Blip_architecture.data_processor import DataProcessor

In [2]:
import csv
from tqdm.auto import tqdm
import urllib.request


train_set = []
validation_set = []
with open("Fakeddit/multimodal_train.tsv") as fd:
    rd = csv.DictReader(fd, delimiter="\t", fieldnames=['author','clean_title','created_utc','domain','hasImage','id','image_url','linked_submission_id','num_comments','score','subreddit','title','upvote_ratio','2_way_label','3_way_label','6_way_label'])
    
    for line in tqdm(rd):
        train_set.append(line)
    train_set = train_set[1:]

with open("Fakeddit/multimodal_validate.tsv") as fd:
    rd = csv.DictReader(fd, delimiter="\t", fieldnames=['author','clean_title','created_utc','domain','hasImage','id','image_url','linked_submission_id','num_comments','score','subreddit','title','upvote_ratio','2_way_label','3_way_label','6_way_label'])
    
    for line in tqdm(rd):
        validation_set.append(line)
    validation_set = validation_set[1:]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

In [3]:
import urllib.request
import cv2 as cv
import numpy as np
from concurrent.futures import ThreadPoolExecutor

processor = DataProcessor()

def download_and_process_image(b):
    image_url = b['image_url']
    try:
        # Download the image data into memory
        with urllib.request.urlopen(image_url) as url_response:
            image_data = url_response.read()
            # Convert the byte data to a NumPy array
            image_array = np.asarray(bytearray(image_data), dtype=np.uint8)
            # Decode the image
            img = cv.imdecode(image_array, cv.IMREAD_COLOR)
            # Convert color space to RGB
            img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
            # Get label and title
            y = int(b['2_way_label'])
            t = b['clean_title']
            return img, y, t
    except:
        # If any error occurs, return None
        return None

def collate_fn(batch):
    x = []
    y = []
    t = []
    # Use ThreadPoolExecutor to download and process images in parallel
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [executor.submit(download_and_process_image, b) for b in batch]
        for future in futures:
            result = future.result()
            if result is not None:
                img, label, title = result
                x.append(img)
                y.append(label)
                t.append(title)
    return processor(x, t, y)

Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [None]:
from torch.utils.data import DataLoader

train_dl = DataLoader(train_set[5000:], batch_size=16, shuffle=True, collate_fn=collate_fn)
val_dl = DataLoader(validation_set[1000:], batch_size=16, shuffle=True, collate_fn=collate_fn)

next(iter(train_dl))

(tensor([[[[-0.3098, -0.3098, -0.3098,  ..., -0.2392, -0.2471, -0.2471],
           [-0.3098, -0.3098, -0.3098,  ..., -0.2392, -0.2471, -0.2471],
           [-0.3098, -0.3098, -0.3098,  ..., -0.2392, -0.2471, -0.2471],
           ...,
           [-0.2784, -0.2784, -0.2784,  ..., -0.3490, -0.2392, -0.4039],
           [-0.2784, -0.2784, -0.2784,  ..., -0.3333, -0.2471, -0.3804],
           [-0.2706, -0.2706, -0.2706,  ..., -0.3804, -0.4196, -0.3725]],
 
          [[-0.2157, -0.2157, -0.2157,  ..., -0.1451, -0.1529, -0.1529],
           [-0.2157, -0.2157, -0.2157,  ..., -0.1451, -0.1529, -0.1529],
           [-0.2157, -0.2157, -0.2157,  ..., -0.1451, -0.1529, -0.1529],
           ...,
           [-0.2314, -0.2314, -0.2314,  ..., -0.3333, -0.2235, -0.3882],
           [-0.2314, -0.2314, -0.2314,  ..., -0.3176, -0.2314, -0.3647],
           [-0.2235, -0.2235, -0.2235,  ..., -0.3647, -0.4039, -0.3569]],
 
          [[-0.1216, -0.1216, -0.1216,  ..., -0.0196, -0.0275, -0.0275],
           [-

In [None]:
feature_extraction = FeatureExtractionLayer()
fusion_layer = FusionLayer()
clsf_layer = ClsfLayer()

tt_blip = TT_Blip(feature_extraction, fusion_layer, clsf_layer)

In [None]:
from lightning import Trainer
from lightning.pytorch.loggers import WandbLogger

logger = WandbLogger(project="Thesis", name="TT-Blip Fakeddit")
trainer = Trainer(logger=logger, log_every_n_steps=1, max_epochs=50, accumulate_grad_batches=4)

trainer.fit(tt_blip, train_dl, val_dl)