In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from datasets import load_dataset
from torchvision.transforms import ToTensor
from scipy.fftpack import dct
from PIL import Image
import numpy as np

In [2]:
# Define the custom collate function
def custom_collate(batch):
    block_size = 8
    dct_batch = []
    for image in batch:
        # Convert the image to a numpy array
        image_array = np.array(image)
        # Pad the image to make its dimensions divisible by block_size
        height, width, channels = image_array.shape
        padded_height = height + (block_size - height % block_size) % block_size
        padded_width = width + (block_size - width % block_size) % block_size
        padded_image = np.pad(image_array, ((0, padded_height - height), (0, padded_width - width), (0, 0)), mode='constant')
        # Perform DCT on image blocks
        dct_image = np.zeros_like(padded_image, dtype=np.float32)
        for i in range(0, padded_height, block_size):
            for j in range(0, padded_width, block_size):
                block = padded_image[i:i+block_size, j:j+block_size, :]
                dct_block = dct(dct(block, axis=0, norm='ortho'), axis=1, norm='ortho')
                dct_image[i:i+block_size, j:j+block_size, :] = dct_block
        dct_batch.append(dct_image)
    return torch.tensor(dct_batch)

In [3]:
# Define the CNN model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.pool = nn.AvgPool2d(kernel_size=2, stride=2)
        self.fc = nn.Linear(32 * 32 * 32, 1)  # Output layer with 1 neuron for binary classification

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))
        x = x.view(-1, 32 * 32 * 32)  # Flatten the output for the fully connected layer
        x = torch.sigmoid(self.fc(x))  # Apply sigmoid activation for binary classification
        return x

In [4]:
# define dataset
dataset = load_dataset('imagefolder', data_dir='/data/upb/users/b/bakshit/profiles/unix/cs/FraudDetectionThesis/Dataset1')

Resolving data files:   0%|          | 0/86646 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/300 [00:00<?, ?it/s]

In [5]:
# transformations
from transformers import CLIPImageProcessor, CLIPModel
image_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor

normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
size = (
    image_processor.size["shortest_edge"]
    if "shortest_edge" in image_processor.size
    else (image_processor.size["height"], image_processor.size["width"])
)
print(size)
_transforms = Compose([RandomResizedCrop(128), ToTensor(), normalize])

def transforms(examples):
    examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
    del examples["image"]
    return examples

224


In [6]:
dataset = dataset.with_transform(transforms)

In [7]:
# Define the custom collate function
def custom_collate(batch):
    block_size = 8
    dct_batch = []
    for image in batch:
        # Convert the image to a numpy array
        image_array = np.array(image)
        # Pad the image to make its dimensions divisible by block_size
        height, width, channels = image_array.shape
        padded_height = height + (block_size - height % block_size) % block_size
        padded_width = width + (block_size - width % block_size) % block_size
        padded_image = np.pad(image_array, ((0, padded_height - height), (0, padded_width - width), (0, 0)), mode='constant')
        # Perform DCT on image blocks
        dct_image = np.zeros_like(padded_image, dtype=np.float32)
        for i in range(0, padded_height, block_size):
            for j in range(0, padded_width, block_size):
                block = padded_image[i:i+block_size, j:j+block_size, :]
                dct_block = dct(dct(block, axis=0, norm='ortho'), axis=1, norm='ortho')
                dct_image[i:i+block_size, j:j+block_size, :] = dct_block
        dct_batch.append(dct_image)
    return torch.tensor(dct_batch)

In [8]:
# define the dataloader
train_loader = DataLoader(dataset['train'], batch_size=512, shuffle=True)
test_loader = DataLoader(dataset['test'], batch_size=512, shuffle=False)

In [28]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToTensor
from scipy.fftpack import dct
from PIL import Image
import numpy as np


# Define the CNN model
class DCT_CNN(nn.Module):
    def __init__(self):
        super(DCT_CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=3, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.pool = nn.AvgPool2d(kernel_size=2, stride=2)
        self.fc = nn.Linear(32 * 32 * 32, 1)  # Output layer with 1 neuron for binary classification

    def forward(self, x):
        print('first input: ', x['pixel_values'].shape)
        x = self.conv1(x['pixel_values'])
        print('after first layer: ',x.shape)
        x = self.conv2(x)
        print('after second layer', x.shape)
        x = self.pool(x)
        print('after pooling', x.shape)
        x=self.conv3(x)
        print('after third layer', x.shape)
        x=self.pool(x)
        print('after 2nd pooling', x.shape)
        x=self.conv4(x)
        print('after fourth layer', x.shape)
        x = x.view(-1, 32 * 32 * 32)  # Flatten the output for the fully connected layer
        print('before sigmoid', x.shape)
        x = self.fc(x)
        x=torch.sigmoid(x)
        print('after sigmoid', x.shape)
        return x


In [34]:
# Create an instance of the CNN model
model = DCT_CNN()
num_epochs=5
# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(num_epochs):
    for batch_dct in train_loader:
        # Forward pass
        outputs = model(batch_dct).squeeze()
        # print(batch_dct)
        # Compute loss
        labels = batch_dct['label'] # Example labels for binary classification
        
        print('dimension of labels is: ', labels.shape)
        print('dimension of outputs is: ', outputs.shape)
        loss = criterion(outputs, labels.float())
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


first input:  torch.Size([512, 3, 128, 128])
after first layer:  torch.Size([512, 3, 128, 128])
after second layer torch.Size([512, 8, 128, 128])
after pooling torch.Size([512, 8, 64, 64])
after third layer torch.Size([512, 16, 64, 64])
after 2nd pooling torch.Size([512, 16, 32, 32])
after fourth layer torch.Size([512, 32, 32, 32])
before sigmoid torch.Size([512, 32768])
after sigmoid torch.Size([512, 1])
dimension of labels is:  torch.Size([512])
dimension of outputs is:  torch.Size([512])
first input:  torch.Size([512, 3, 128, 128])
after first layer:  torch.Size([512, 3, 128, 128])
after second layer torch.Size([512, 8, 128, 128])
after pooling torch.Size([512, 8, 64, 64])
after third layer torch.Size([512, 16, 64, 64])
after 2nd pooling torch.Size([512, 16, 32, 32])
after fourth layer torch.Size([512, 32, 32, 32])
before sigmoid torch.Size([512, 32768])
after sigmoid torch.Size([512, 1])
dimension of labels is:  torch.Size([512])
dimension of outputs is:  torch.Size([512])
first in

KeyboardInterrupt: 