In [None]:
import torch
import torch.nn as nn
from transformers import CLIPImageProcessor, CLIPModel

from datasets import Dataset, load_dataset
from torch.utils.tensorboard import SummaryWriter
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from transformers import DefaultDataCollator
from transformers import CLIPImageProcessor, CLIPModel
import numpy as np

import glob
import shutil
import os
import torchvision
from torchvision import datasets, transforms, models
from torch.nn.modules.loss import BCEWithLogitsLoss
from torch.optim import lr_scheduler
from tqdm import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"

### Neural Network Implementation

In [55]:
from sklearn.metrics import confusion_matrix, classification_report

def make_train_step(model, optimizer, loss_fn):
    def train_step(x,y):
    #make prediction
        
        yhat = model(x)
        model.train()
        loss = loss_fn(yhat,y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        return loss, yhat
        
    return train_step

def calculate_accuracy(yhat, y):
    yhat = torch.sigmoid(yhat)
    predicted = (yhat > 0.5).float()
    correct = (predicted == y).float().sum()
    accuracy = correct / y.numel()
    return accuracy

def calculate_accuracy_test(yhat, y):
    yhat = torch.sigmoid(yhat)
    predicted = (yhat > 0.5).float()
    correct = (predicted == y).float().sum()
    accuracy = correct / y.numel()
    return accuracy.item(), correct.item(), y.numel()

def plot_roc_curve(writer, y_true, y_scores, epoch):
    fpr, tpr, _ = roc_curve(y_true, y_scores)
    roc_auc = roc_auc_score(y_true, y_scores)
    writer.add_scalar('AUC', roc_auc, epoch)
    writer.add_pr_curve('ROC', y_true, y_scores, epoch)
    return roc_auc


def predict_test_data(testloader, model, ):
    all_preds = []
    all_labels = []
    with torch.no_grad():
        cum_loss = 0
        total_correct = 0
        cum_accuracy = 0
        total_images = 0
        for x_batch, y_batch in testloader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.unsqueeze(1).float()  # convert target to same nn output shape
            y_batch = y_batch.to(device)
        
            # model to eval mode
            model.eval()
        
            yhat = model(x_batch)

            # Collect predictions and labels
            all_preds.append(torch.sigmoid(yhat).cpu().numpy())
            all_labels.append(y_batch.cpu().numpy())
        
            # Calculate accuracy
            accuracy, correct, total = calculate_accuracy_test(yhat, y_batch)
            total_correct += correct
            total_images += total
            
            # cum_accuracy += accuracy / len(testloader)
    overall_accuracy = total_correct / total_images
    print(f'Overall accuracy on the test dataset: {overall_accuracy * 100:.2f}%')
    print(f'Total correct predictions: {total_correct} out of {total_images} images')

    # Convert lists to numpy arrays
    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels).astype(int)

    # Plot ROC curve
    roc_auc = plot_roc_curve(writer, all_labels, all_preds, epoch)
    print(f'ROC AUC: {roc_auc:.2f}')
    
    # Print confusion matrix and classification report
    all_preds = (all_preds > 0.5).astype(int)
    print('Confusion Matrix:')
    print(confusion_matrix(all_labels, all_preds))
    print('Classification Report:')
    print(classification_report(all_labels, all_preds))

def basic_neural_network(model, trainloader, validloader, testloader, model_name=None,n_epochs=20):
    # Initialize the TensorBoard writer
    model.train()
    loss_fn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.AdamW(model.fc.parameters(), lr=0.001)
    train_step = make_train_step(model, optimizer, loss_fn)
    if model_name is None:
        model_name=model._get_name()
    writer = SummaryWriter(f'runs/loss_{model_name}')
    losses = []
    val_losses = []
    
    epoch_train_losses = []
    epoch_test_losses = []
    epoch_train_accuracies = []
    epoch_val_accuracies = []
    
    early_stopping_tolerance = 3
    early_stopping_threshold = 0.03
    
    print('starting the training loop')
    for epoch in tqdm(range(n_epochs), total=n_epochs):
        epoch_loss = 0
        epoch_accuracy = 0
        for i, data in enumerate(trainloader):  # iterate over batches
            x_batch, y_batch = data
            x_batch = x_batch.to(device)  # move to gpu
            y_batch = y_batch.unsqueeze(1).float()  # convert target to same nn output shape
            y_batch = y_batch.to(device)  # move to gpu
    
            loss, yhat = train_step(x_batch, y_batch)
            epoch_loss += loss / len(trainloader)
            losses.append(loss)

            # Calculate accuracy
            accuracy = calculate_accuracy(yhat, y_batch)
            epoch_accuracy += accuracy / len(trainloader)


    
        epoch_train_losses.append(epoch_loss)
        epoch_train_accuracies.append(epoch_accuracy)
        print('\nEpoch : {}, train loss : {}'.format(epoch + 1, epoch_loss))
    
        # Log the training loss to TensorBoard
        writer.add_scalar(f'train/loss', epoch_loss, epoch)
        writer.add_scalar(f'train/Accuracy', epoch_accuracy, epoch)
        
        # validation doesn't require gradients
        with torch.no_grad():
            cum_loss = 0
            cum_accuracy = 0
            for x_batch, y_batch in validloader:
                x_batch = x_batch.to(device)
                y_batch = y_batch.unsqueeze(1).float()  # convert target to same nn output shape
                y_batch = y_batch.to(device)
    
                # model to eval mode
                model.eval()
    
                yhat = model(x_batch)
                val_loss = loss_fn(yhat, y_batch)
                cum_loss += val_loss.item() / len(validloader)
                val_losses.append(val_loss.item())

                # Calculate accuracy
                accuracy = calculate_accuracy(yhat, y_batch)
                cum_accuracy += accuracy / len(validloader)
    
            epoch_test_losses.append(cum_loss)  # for every epoch, save the validation loss
            print('Epoch : {}, val loss : {}'.format(epoch + 1, cum_loss))
    
            # Log the validation loss to TensorBoard
            writer.add_scalar(f'validation/loss', cum_loss, epoch)
            writer.add_scalar(f'validation/accuracy', cum_accuracy, epoch)
    
    
            best_loss = min(epoch_test_losses)
    
            # save best model
            if cum_loss <= best_loss:
                best_model_wts = model.state_dict()
                best_epoch = epoch

    
            # early stopping
            early_stopping_counter = 0
            if cum_loss > best_loss:
                early_stopping_counter += 1
    
            if (early_stopping_counter == early_stopping_tolerance) or (best_loss <= early_stopping_threshold):
                print("\nTerminating: early stopping")
                break  # terminate training
    
    # Close the TensorBoard writer
    writer.close()
    # saving the model dictionary in results/{model_name}
    torch.save(best_model_wts, f'./results/{model_name}_{best_epoch}')

    model.load_state_dict(best_model_wts)
    predict_test_data(testloader, model,)
    
    return best_model_wts
    

## Multi-Concept Dataset

### Model 1: CLIP Based Classifier

#### dataloaders

In [56]:
main_folder = '/upb/users/b/bakshit/profiles/unix/cs/FraudDetectionThesis/data/Dataset1'
train_folder = os.path.join(main_folder,'train')
test_folder = os.path.join(main_folder, 'test')
valid_folder = os.path.join(main_folder, 'validation')


#transformations
transformations = transforms.Compose([transforms.Resize((224,224)),
                                       transforms.ToTensor(),
                                       torchvision.transforms.Normalize(
                                           mean=[0.485, 0.456, 0.406],
                                           std=[0.229, 0.224, 0.225],),
                                       ])


#datasets
train_data = datasets.ImageFolder(train_folder, transform=transformations)
valid_data = datasets.ImageFolder(valid_folder, transform=transformations)
test_data = datasets.ImageFolder(test_folder, transform=transformations)

#dataloader
trainloader = torch.utils.data.DataLoader(train_data, shuffle = True, batch_size=16)
validloader = torch.utils.data.DataLoader(valid_data, shuffle = True, batch_size=16)
testloader = torch.utils.data.DataLoader(test_data, shuffle = True, batch_size=16)

#### Model Implementation

In [None]:
## CLIP Based Linear Classifier

from transformers import AutoModelForImageClassification, TrainingArguments, Trainer, AdamW
import torch.nn as nn

class CLIPModelClassifier(nn.Module):
    def __init__(self, num_classes=1):
        super(CLIPModelClassifier, self).__init__()
        self.model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(device)
        
        self.fc = nn.Linear(768, 1)

    def forward(self, x):       
        features = self.model.get_image_features(pixel_values=x)      
        logits = self.fc(features)
        return logits

from tqdm import tqdm

model = CLIPModelClassifier().to(device)

# freeze all paramas of this model, because we are using a pretrained CLIP model
for params in model.parameters():
    params.requires_grad_ = False

best_model_for_inference_Clip = basic_neural_network(model, trainloader, validloader, testloader, model_name='clipSigmoid')



starting the training loop


  0%|                                                                                                                       | 0/20 [00:00<?, ?it/s]

### Model 2: DCT Based CNN

In [15]:
# Custom DCT transformation
import torch_dct as dct

class DCTTransform:
    def __call__(self, image):
        image_array = np.array(image)
        dct_image = dct.dct_2d(torch.tensor(image_array, dtype=torch.float32))
        return dct_image


# Transformations based on dct application
transformations = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    DCTTransform()
])

#datasets
train_data = datasets.ImageFolder(train_folder, transform=transformations)
valid_data = datasets.ImageFolder(valid_folder, transform=transformations)
test_data = datasets.ImageFolder(test_folder, transform=transformations)

trainloader = torch.utils.data.DataLoader(train_data, shuffle = True, batch_size=16)
validloader = torch.utils.data.DataLoader(valid_data, shuffle = True, batch_size=16)
test_loader = torch.utils.data.DataLoader(test_data, shuffle = True, batch_size=16)



#### Resnet18 based DCT

In [17]:
model = models.resnet18(pretrained=True)  # Example model, replace with your model
#freeze all params
for params in model.parameters():
  params.requires_grad_ = False

#add a new final layer
nr_filters = model.fc.in_features  #number of input features of last layer
model.fc = nn.Linear(nr_filters, 1)

model = model.to(device)

best_model_for_dct_resnet = basic_neural_network(model, trainloader, validloader, testloader, model_name='dct_resnet',n_epochs = 20)


starting the training loop


  0%|                                                                                                                       | 0/20 [00:00<?, ?it/s]


Epoch : 1, train loss : 0.6155248284339905


  5%|█████▌                                                                                                         | 1/20 [01:02<19:45, 62.41s/it]

Epoch : 1, val loss : 0.5609044268131257

Epoch : 2, train loss : 0.5641117691993713


 10%|███████████                                                                                                    | 2/20 [02:07<19:06, 63.71s/it]

Epoch : 2, val loss : 0.5567783845663075

Epoch : 3, train loss : 0.5513978600502014


 15%|████████████████▋                                                                                              | 3/20 [03:05<17:24, 61.45s/it]

Epoch : 3, val loss : 0.5863106069962184

Epoch : 4, train loss : 0.5486661791801453


 20%|██████████████████████▏                                                                                        | 4/20 [04:02<15:52, 59.56s/it]

Epoch : 4, val loss : 0.5420178808371227

Epoch : 5, train loss : 0.5419958829879761


 25%|███████████████████████████▊                                                                                   | 5/20 [05:00<14:44, 58.94s/it]

Epoch : 5, val loss : 0.5962251332203544

Epoch : 6, train loss : 0.5397767424583435


 30%|█████████████████████████████████▎                                                                             | 6/20 [05:59<13:46, 59.07s/it]

Epoch : 6, val loss : 0.5151825442314152

Epoch : 7, train loss : 0.5388005971908569


 35%|██████████████████████████████████████▊                                                                        | 7/20 [06:55<12:36, 58.17s/it]

Epoch : 7, val loss : 0.5163144774834316

Epoch : 8, train loss : 0.5428364872932434


 40%|████████████████████████████████████████████▍                                                                  | 8/20 [08:00<12:04, 60.36s/it]

Epoch : 8, val loss : 0.5217763189474741

Epoch : 9, train loss : 0.5366469621658325


 45%|█████████████████████████████████████████████████▉                                                             | 9/20 [09:00<11:00, 60.02s/it]

Epoch : 9, val loss : 0.5334859670400617

Epoch : 10, train loss : 0.5341106653213501


 50%|███████████████████████████████████████████████████████                                                       | 10/20 [10:00<10:02, 60.21s/it]

Epoch : 10, val loss : 0.5083791755437854

Epoch : 11, train loss : 0.5296829342842102


 55%|████████████████████████████████████████████████████████████▌                                                 | 11/20 [12:55<14:17, 95.32s/it]

Epoch : 11, val loss : 0.5070689037640892

Epoch : 12, train loss : 0.530288577079773


 60%|██████████████████████████████████████████████████████████████████                                            | 12/20 [13:53<11:09, 83.74s/it]

Epoch : 12, val loss : 0.5119425270160037

Epoch : 13, train loss : 0.5327972173690796


 65%|███████████████████████████████████████████████████████████████████████▌                                      | 13/20 [14:52<08:55, 76.47s/it]

Epoch : 13, val loss : 0.539057677268982

Epoch : 14, train loss : 0.5377020239830017


 70%|█████████████████████████████████████████████████████████████████████████████                                 | 14/20 [15:50<07:04, 70.76s/it]

Epoch : 14, val loss : 0.5092826329867046

Epoch : 15, train loss : 0.5316939353942871


 75%|██████████████████████████████████████████████████████████████████████████████████▌                           | 15/20 [16:48<05:35, 67.01s/it]

Epoch : 15, val loss : 0.5070423127015433

Epoch : 16, train loss : 0.5324230790138245


 80%|████████████████████████████████████████████████████████████████████████████████████████                      | 16/20 [17:52<04:24, 66.08s/it]

Epoch : 16, val loss : 0.539875014225642

Epoch : 17, train loss : 0.5363146066665649


 85%|█████████████████████████████████████████████████████████████████████████████████████████████▌                | 17/20 [18:51<03:12, 64.01s/it]

Epoch : 17, val loss : 0.5136023394266761

Epoch : 18, train loss : 0.5284669995307922


 90%|███████████████████████████████████████████████████████████████████████████████████████████████████           | 18/20 [19:51<02:05, 62.68s/it]

Epoch : 18, val loss : 0.5184333922068284

Epoch : 19, train loss : 0.5286825895309448


 95%|████████████████████████████████████████████████████████████████████████████████████████████████████████▌     | 19/20 [20:49<01:01, 61.26s/it]

Epoch : 19, val loss : 0.5130645715792975

Epoch : 20, train loss : 0.5307837724685669


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [21:47<00:00, 65.38s/it]

Epoch : 20, val loss : 0.5337977486054104





Overall accuracy on the test dataset: 50.00%
Total correct predictions: 3000.0 out of 6000 images
Confusion Matrix:
[[3000    0]
 [3000    0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.50      1.00      0.67      3000
           1       0.00      0.00      0.00      3000

    accuracy                           0.50      6000
   macro avg       0.25      0.50      0.33      6000
weighted avg       0.25      0.50      0.33      6000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### CNN based DCT

In [50]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class DCT_CNN(nn.Module):
    def __init__(self):
        super(DCT_CNN, self).__init__()
        self.dropout = nn.Dropout(p=0.7)
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.pool = nn.AvgPool2d(kernel_size=2, stride=2)
        self.fc = nn.Linear(64 * 14 * 14, 1)  # Corrected input size based on the calculations

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))
        # x = x.view(x.size(0), -1)  # Flatten the output for the fully connected layer
        x = self.fc(x)
        return x

# Example usage
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DCT_CNN().to(device)

# Now you can use this model in your training loop without shape issues.


In [51]:
model = DCT_CNN()
for params in model.parameters():
  params.requires_grad_ = False
#add a new final layer
# nr_filters = model.fc.in_features  #number of input features of last layer
# model.fc = nn.Linear(nr_filters, 1)
model = model.to(device)

best_model_for_dct_resnet = basic_neural_network(model, trainloader, validloader, testloader_ddpm, model_name='dct_cnn',n_epochs = 20)


starting the training loop


  0%|                                                                                                                       | 0/20 [00:00<?, ?it/s]


RuntimeError: mat1 and mat2 shapes cannot be multiplied (14336x14 and 12544x1)

### Model 3: Resnet50 Based Detection

#### data loaders

In [None]:

#transformations
transformations = transforms.Compose([transforms.Resize((224,224)),
                                       transforms.ToTensor(),
                                       torchvision.transforms.Normalize(
                                           mean=[0.485, 0.456, 0.406],
                                           std=[0.229, 0.224, 0.225],),
                                       ])


#datasets
train_data = datasets.ImageFolder(train_folder, transform=transformations)
valid_data = datasets.ImageFolder(valid_folder, transform=transformations)
test_data = datasets.ImageFolder(test_folder, transform=transformations)

#dataloader
trainloader = torch.utils.data.DataLoader(train_data, shuffle = True, batch_size=16)
validloader = torch.utils.data.DataLoader(valid_data, shuffle = True, batch_size=16)
testloader = torch.utils.data.DataLoader(test_data, shuffle = True, batch_size=16)

#### model implementation

In [None]:
model = models.resnet50(pretrained=True)  # Example model, replace with your model
#freeze all params
for params in model.parameters():
  params.requires_grad_ = False

#add a new final layer
nr_filters = model.fc.in_features  #number of input features of last layer
model.fc = nn.Linear(nr_filters, 1)

model = model.to(device)

best_model_for_dct_resnet = basic_neural_network(model, trainloader, validloader, testloader, model_name='resnet50',n_epochs = 20)

### Model 4: DIRE

#### Implement Dataloader

In [None]:
import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import torch
from diffusers import StableDiffusionXLImg2ImgPipeline, DDIMScheduler
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import numpy as np
from PIL import Image
from torchvision.transforms.functional import to_pil_image

# Define transforms for the images
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

tensor2image = transforms.ToPILImage()


def calculate_dire(original, reconstructed):
    # original = np.array(original.permute(1, 2, 0))
    # reconstructed = np.array(reconstructed)
    dire = torch.abs(original-reconstructed)#np.linalg.norm(original - reconstructed)
    
    return dire_score

# Combine datasets and create DataLoader
train_dataset = datasets.ImageFolder(train_folder, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

val_dataset = datasets.ImageFolder(valid_folder, transform=transform)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=True)

test_dataset = datasets.ImageFolder(test_folder, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=True)

# Load the pre-trained stable diffusion model and DDIM scheduler
pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16).to('cuda')
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)

# Move pipeline to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
# pipe.to(device)
i=0

def reconstruct_loader(data_loader):
    dire_scores = []
    labels_list = []
    for images, labels in data_loader:
        prompts = ["draw a high quality image"]*len(images)
        images = [tensor2image(image) for image in images]
        reconstructions = pipe(prompt=prompts, image=images, strength=0.1, guidance=1).images
        
        for original, reconstructed, label in zip(images, reconstructions, labels):
            reconst = transform(reconstructed)
            original = transform(original)
            dire_score = torch.abs(original-reconst)
            dire_scores.append(dire_score)
            labels_list.append(label)
    
    dire_scores = np.array(dire_scores)
    labels = np.array(labels_list)

    return dire_scores, labels

train_dire_scores, train_labels = reconstruct_loader(train_loader)
train_dataset = torch.utils.data.TensorDataset(train_dire_scores, train_labels)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

val_dire_scores, val_labels = reconstruct_loader(val_loader)
val_dataset = torch.utils.data.TensorDataset(val_dire_scores, val_labels)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=True)

test_dire_scores, test_labels = reconstruct_loader(test_loader)
test_dataset = torch.utils.data.TensorDataset(test_dire_scores, test_labels)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)




#### Model Implementation

In [None]:
# Load pre-trained ResNet50 model
model = models.resnet50(pretrained=True)

# Modify the classifier part of ResNet50
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 1)  # Number of classes in your dataset

model = model.to(device)
best_model_for_dct_resnet = basic_neural_network(model, trainloader, validloader, testloader, model_name='dire_report',n_epochs = 20)


## Single-Themed Dataset