Import : First the standard library modules (e.g., os, gc, json), then data analysis libraries (numpy, pandas, matplotlib, PIL), followed by PyTorch/Torchvision imports (torch, nn, optim, torchvision, etc.), and finally additional packages like optuna, wandb, and sklearn

In [30]:
import os
import gc
import json

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, Subset

import torchvision
from torchvision import datasets, models, transforms
from torchvision.models import VGG19_Weights

import optuna
import wandb
from sklearn.model_selection import KFold

Free unused GPU cache with PyTorch and invoke Python's garbage collector to release unreferenced objects

In [31]:
torch.cuda.empty_cache()
gc.collect()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 

device

device(type='cuda')

Load the CSV, keep selected columns, filter only **Post-Impressionism** images, add a 'is_van_gogh' flag, and reset the index.

In [32]:
classes = pd.read_csv("classes.csv", encoding="utf-8")
classes=classes[['filename', 'artist', 'genre', 'description', 'phash', 'width','height', 'genre_count']].copy()
classes_only_post_impressionism = classes[classes["filename"].str.contains('Post_Impressionism', case=False, na=False)].copy()
classes_only_post_impressionism['is_van_gogh'] = np.where(classes_only_post_impressionism['artist'] == 'vincent van gogh', 1, 0)
classes_only_post_impressionism=classes_only_post_impressionism.reset_index(drop=True)

In [33]:
classes_only_post_impressionism

Unnamed: 0,filename,artist,genre,description,phash,width,height,genre_count,is_van_gogh
0,Post_Impressionism/a.y.-jackson_early-spring-q...,a.y. jackson,['Post Impressionism'],early-spring-quebec-1923,f23aa4049cc3ebec,1692,1381,1,0
1,Post_Impressionism/a.y.-jackson_entrance-to-ha...,a.y. jackson,['Post Impressionism'],entrance-to-halifax-harbour-1919,af77c19aa5e49805,1752,1382,1,0
2,Post_Impressionism/a.y.-jackson_houses-st-urba...,a.y. jackson,['Post Impressionism'],houses-st-urbain-1934,d636ac6d69d0d226,1702,1382,1,0
3,Post_Impressionism/a.y.-jackson_maple-woods-al...,a.y. jackson,['Post Impressionism'],maple-woods-algoma-1920,87ee38d819a8e3c5,1779,1382,1,0
4,Post_Impressionism/a.y.-jackson_march-storm-ge...,a.y. jackson,['Post Impressionism'],march-storm-georgian-bay-1920,dedb2a0184f1762b,1821,1382,1,0
...,...,...,...,...,...,...,...,...,...
6302,Post_Impressionism/wassily-kandinsky_not_detec...,wassily kandinsky,['Post Impressionism'],not_detected_189369,b130e2b8d196a3e7,1382,1845,1,0
6303,Post_Impressionism/william-h.-johnson_african-...,william h. johnson,['Post Impressionism'],african-woman-study-in-tunis-1932,e2c6947a5909c9f3,1382,1791,1,0
6304,Post_Impressionism/william-scott_blue-still-li...,william scott,['Post Impressionism'],blue-still-life-1957,90b95bb535d86aa4,1718,1382,1,0
6305,Post_Impressionism/william-scott_five-pears-19...,william scott,['Post Impressionism'],five-pears-1976,f68bc934a3413c8f,1653,1382,1,0


Custom PyTorch Dataset that loads images/labels from a DataFrame, applies optional transforms, and returns (image, label) pairs. Define a set of data augmentations for training: random resized crop, horizontal flip, rotation,
and color jitter, followed by normalization. For testing, use a simpler transform (resize + normalization).










In [41]:
class VanGoghDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        
        self.dataframe = dataframe  
        self.img_dir = '' 
        self.transform = transforms.Compose([
        transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),  
        transforms.RandomHorizontalFlip(p=0.5), 
        transforms.RandomRotation(degrees=15), 
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])  

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
      
        img_name = os.path.join(self.img_dir, self.dataframe.iloc[idx, 0])
        image = Image.open(img_name).convert('RGB')  #
        label = torch.tensor(self.dataframe.iloc[idx, 8], dtype=torch.float32)
        if self.transform:
            image = self.transform(image)
            
        return image, label


***Create Model founction*** : We replaced the original classification layer with a single-output Sigmoid layer for binary classification. we freezing all the feature layers and only training the final classifier layers, we effectively fine-tune just the head of the network.

In [77]:
class VanGoghModel(Dataset):
    
    def __init__(self,dataset,device,Optuna_trial=None,name='VGG19'):
        
        if Optuna_trial is not None:
            self.model_name = Optuna_trial.suggest_categorical("model", ["VGG19", "AlexNet"])
            self.model=None
        else:
            self.model_name = name
            self.model=None
    
    
        if self.model_name == "VGG19":
            self.model = models.vgg19(weights=VGG19_Weights.IMAGENET1K_V1)
            self.model.classifier[6] = nn.Sequential(nn.Linear(self.model.classifier[6].in_features, 1),nn.Sigmoid())
    
        elif self.model_name == "AlexNet":
            self.model = models.alexnet(weights=models.AlexNet_Weights.DEFAULT)
            self.model.classifier[6] = nn.Sequential(nn.Linear(self.model.classifier[6].in_features, 1),nn.Sigmoid())
    
        self.model = self.model.to(device)
    
        for param in self.model.features.parameters():
            param.requires_grad = False
    
        for param in self.model.classifier[-1:].parameters():  
            param.requires_grad = True



***Create Dataset Example***

In [78]:
dataset = VanGoghDataset(dataframe=classes_only_post_impressionism)


***Create Model Example***

In [79]:
model_VGG19 = VanGoghModel(dataset,device,None,'VGG19')
model_Alex = VanGoghModel(dataset,device,None,'AlexNet')
