## Load data

In [13]:
import torch
import torch.nn as nn
from transformers import CLIPImageProcessor, CLIPModel

import torchvision.transforms as transforms
from datasets import Dataset, load_dataset
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from transformers import DefaultDataCollator
from transformers import CLIPImageProcessor, CLIPModel

device = "cuda" if torch.cuda.is_available() else "cpu"

In [14]:
import os
from random import sample
from datasets import Dataset, load_dataset
fraud_folder = '/upb/users/b/bakshit/profiles/unix/cs/FraudDetectionThesis'
train_folder = os.path.join(fraud_folder, 'data/Dataset1/train')

In [15]:
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor

_transforms = Compose([RandomResizedCrop(224), ToTensor(), ])

def transforms(examples):
    examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
    del examples["image"]
    return examples

In [16]:
train_ds= load_dataset('imagefolder', data_dir=train_folder,split='train[:80%]')
valid_ds = load_dataset('imagefolder', data_dir=train_folder,split='train[80%:]')
    
train_ds = train_ds.with_transform(transforms)
valid_ds = valid_ds.with_transform(transforms)

Resolving data files:   0%|          | 0/20500 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/20500 [00:00<?, ?it/s]

In [17]:
# Define data loaders with batch size
train_loader = DataLoader(train_ds, batch_size=4, shuffle=True)
valid_loader = DataLoader(valid_ds, batch_size=4, shuffle=False)

In [18]:
train_ds

Dataset({
    features: ['image', 'label'],
    num_rows: 16400
})

In [19]:
valid_ds

Dataset({
    features: ['image', 'label'],
    num_rows: 4100
})

### Machine learning Models

In [9]:
## CLIP Based Linear Classifier

from transformers import AutoModelForImageClassification, TrainingArguments, Trainer, AdamW
import torch.nn as nn

class CLIPModelClassifier(nn.Module):
    def __init__(self, num_classes=2):
        super(CLIPModelClassifier, self).__init__()
        self.model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(device)
        self.fc = nn.Linear(768, 1)

    def forward(self, x):       
        features = self.model.get_image_features(pixel_values=x)      
        logits = self.fc(features)
        # probabilities = torch.sigmoid(logits)
        return logits

In [38]:
## Neural Network based model

import torch
import torch.nn as nn
import torch.optim as optim

class NeuralNetwork(nn.Module):
    def __init__(self, ):
        input_size = 768  # Size of image embeddings
        hidden_size = 224  # Size of hidden layer
        num_classes = 1  # Number of classes (e.g., real vs. fake)
        super(NeuralNetwork, self).__init__()
        self.model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(device)
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        features = self.model.get_image_features(pixel_values=x)      
        x = self.fc1(features)
        x = self.relu(x)
        x = self.fc2(x)
        return x


#### Load the models

In [10]:
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

clip_model = CLIPModelClassifier().to(device)
clip_model.train()
loss_fn = nn.BCEWithLogitsLoss()#nn.CrossEntropyLoss()

In [39]:
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

nen_model = NeuralNetwork().to(device)
nen_model.train()
loss_fn = nn.BCEWithLogitsLoss()#nn.CrossEntropyLoss()

### Training Loop

#### One epoch training

In [40]:
model = nen_model

In [41]:
def train_one_epoch(epoch_index, tb_writer, optimizer):
    running_loss = 0.
    last_loss = 0.

    for i, data in enumerate(train_loader):
        # Every data instance is an input + label pair
        inputs = data['pixel_values']
        inputs = inputs.to(device)
        labels = data['label'].to(device)

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(inputs)
        # print(outputs)
        outputs = torch.squeeze(outputs,1)
        labels=labels.float()
        
        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()
        
        # Gather data and report
        running_loss += loss.item()
        if i>0 and i % 500 == 0:
            last_loss = running_loss / 500 # loss per batch
            print('  batch {} loss: {}'.format(i + 1, last_loss))
            tb_x = epoch_index * len(train_loader) + i + 1
            # tb_writer.add_scalar('Loss/train', last_loss, tb_x)
            running_loss = 0.

    return last_loss

#### Training for all epochs + validation

In [None]:
from datetime import datetime
current_best_model = ''

EPOCHS = 25

best_vloss = 1_000_000.
lr_list=[0.01,0.001,0.01, 0.0001]

for rate in lr_list:
    print(f'process started for {rate}')
    optimizer = torch.optim.AdamW(model.parameters(), lr=rate,)
    
    # optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.8)
    writer = SummaryWriter(f'classify_image_clip_model/runs/13_april_NralNtwork_clip_{rate}_{EPOCHS}')
    epoch_number = 0

    for epoch in range(EPOCHS):
        print('EPOCH {}:'.format(epoch_number + 1))
    
        # Make sure gradient tracking is on, and do a pass over the data
        model.train(True)
        avg_loss = train_one_epoch(epoch_number, writer, optimizer)
    
    
        running_vloss = 0.0
        # Set the model to evaluation mode, disabling dropout and using population
        # statistics for batch normalization.
        model.eval()
    
        # Disable gradient computation and reduce memory consumption.
        with torch.no_grad():
            for i, vdata in enumerate(valid_loader):
                vinputs = vdata['pixel_values']
                vinputs = vinputs.to(device)
                vlabels = vdata['label'].float().to(device)
                voutputs = model(vinputs)
                vlabels = torch.unsqueeze(vlabels,1)
                vloss = loss_fn(voutputs, vlabels)
                running_vloss += vloss
    
        avg_vloss = running_vloss / (i + 1)
        print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))
    
        # Log the running loss averaged per batch
        # for both training and validation
        writer.add_scalars('Training vs. Validation Loss',
                        { 'Training' : avg_loss, 'Validation' : avg_vloss },
                        epoch_number + 1)
        writer.flush()
    
        # Track best performance, and save the model's state
        if avg_vloss < best_vloss and epoch_number > 3:
            best_vloss = avg_vloss
            model_path = f'classify_image_clip_model/model_cnn_clip/13_april_NralNtwork_clip_model_{rate}_{epoch_number}'
            current_best_model = model_path
            torch.save(model.state_dict(), model_path)
    
        epoch_number += 1
print(current_best_model)

process started for 0.01
EPOCH 1:
  batch 501 loss: 1.2138298326904768


In [None]:
current_best_model

### Inference

In [None]:
from datasets import load_dataset
from torch.utils.data import DataLoader

real_test = os.path.join(fraud_folder, 'data/test_dataset1/real')
ddpm_test = os.path.join(fraud_folder, 'data/Dataset1/test/fake/ddpm')
euler_test= os.path.join(fraud_folder, 'data/Dataset1/test/fake/euler')
unipc_test = os.path.join(fraud_folder, 'data/Dataset1/test/fake/unipc')

real_testds = load_dataset('imagefolder', data_dir=real_test)
ddpmds = load_dataset('imagefolder', data_dir=ddpm_test)
eulerds =  load_dataset('imagefolder', data_dir=euler_test)
unipcds =  load_dataset('imagefolder', data_dir=unipc_test)

#### Implement transforms operations

In [None]:
# Load the CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
# model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)

size = 224
_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])

def transforms(examples):
    examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
    del examples["image"]
    return examples

In [None]:
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor
from transformers import CLIPImageProcessor, CLIPModel
import torch

ddpmds = ddpmds.with_transform(transforms)
eulerds = eulerds.with_transform(transforms)
unipcds = unipcds.with_transform(transforms)
real_testds = real_testds.with_transform(transforms)

test_loader = DataLoader(real_testds['train'], batch_size=32, shuffle=False)
euler_loader = DataLoader(eulerds['train'], batch_size=32, shuffle=False)
unipc_loader = DataLoader(unipcds['train'], batch_size=32, shuffle=False)
ddpm_loader = DataLoader(ddpmds['train'], batch_size=32, shuffle=False)

#### Load the best model

In [None]:
## inference
model_path = os.path.join(fraud_folder, f'src/detection_algorithms/{current_best_model}')
model = CLIPModelClassifier().to(device)
model.load_state_dict(torch.load(model_path))

#### Start inference

In [75]:
import evaluate
metric = evaluate.load("accuracy")
model.eval()
predictions_dict={}
for i, batch in enumerate(valid_loader):
    batch = {k: v.to(device) for k, v in batch.items()}
    with torch.no_grad():
        outputs = model(batch['pixel_values'])
    # predictions = torch.argmax(outputs, dim=-1)
    predicted_labels = torch.round(torch.sigmoid(outputs.cpu())).squeeze().numpy()
    # predictions_dict[i] = predictions
    metric.add_batch(predictions=predicted_labels, references=[0]*len(batch['pixel_values']))
    

metric.compute()

{'accuracy': 1.0}