# Training Style Classifiers

Installing all the required libraries and dependencies

In [1]:
# ! pip install torchsummary
# ! pip install --upgrade opencv-python
# ! pip install albumentations
# ! pip install s3fs
# ! pip install tqdm
# ! pip install torch
# ! pip install torchvision
# ! conda install --yes pytorch torchvision torchaudio cudatoolkit=11.6 -c pytorch -c conda-forge
# ! pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116

In [2]:
# Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary
import cv2
import albumentations as A
import torchvision.models as models
import os
from tqdm import tqdm
import time
import skimage.io as skio
import warnings
from sklearn.metrics import accuracy_score
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


* Reading the dataframe containing the information about the artworks. Can be found in [data/](https://github.com/avmchandrish/art-style-classification/tree/main/data) folder in the github repo.


In [3]:
# Read artworks df
artworks = pd.read_csv('data/artworks.csv')
artworks.head()

Unnamed: 0.1,Unnamed: 0,id,title,year,width,height,artistName,image,map,paintingUrl,artistUrl,albums,flags,images,style
0,0,57728479edc2cb3880fdea33,Silhouette fantastique,1854,500,366,Victor Hugo,https://uploads0.wikiart.org/images/victor-hug...,0*23**67*,/en/victor-hugo/silhouette-fantastique-1854,/en/victor-hugo,,2,,abstract-art
1,1,57728a62edc2cb388010efa1,First Communion of Anaemic Young Girls in the ...,1883,1324,848,Alphonse Allais,https://uploads0.wikiart.org/images/alphonse-a...,01234*67*,/en/alphonse-allais/first-communion-of-anaemic...,/en/alphonse-allais,,2,,abstract-art
2,2,57728a61edc2cb388010ef5f,Apoplectic Cardinals Harvesting Tomatoes on th...,1884,1400,980,Alphonse Allais,https://uploads2.wikiart.org/images/alphonse-a...,01234*67*,/en/alphonse-allais/apoplectic-cardinals-harve...,/en/alphonse-allais,,2,,abstract-art
3,3,57728a61edc2cb388010ef71,Band of Greyfriars in the Fog (Band Of Dusty D...,1884,1400,980,Alphonse Allais,https://uploads2.wikiart.org/images/alphonse-a...,01234*67*,/en/alphonse-allais/band-of-greyfriars-in-the-...,/en/alphonse-allais,,2,,abstract-art
4,4,57728a61edc2cb388010ef81,Negroes Fighting in a Tunnel by Night,1884,800,560,Alphonse Allais,https://uploads5.wikiart.org/images/alphonse-a...,0123**67*,/en/alphonse-allais/negroes-fighting-in-a-tunn...,/en/alphonse-allais,,2,,abstract-art


Creating a column with file path, to access the images

In [4]:
# Create col with filename
artworks['s3_path'] = artworks.apply(lambda row: row['style'] \
                                     + "/" + row['image'].split('/')[-1].split('.')[0] + ".jpg", 
                                     axis=1)
print(f"Example of a path is: {artworks['s3_path'][0]}")

Example of a path is: abstract-art/silhouette-fantastique-1854.jpg


Checking the number of images for each of the class in the data. Ut looks fairly balanced.

In [5]:
artworks['style'].value_counts()

rococo                        3600
neoclassicism                 3600
symbolism                     3600
art-nouveau-modern            3600
baroque                       3600
surrealism                    3600
romanticism                   3600
expressionism                 3600
impressionism                 3600
realism                       3600
abstract-expressionism        3600
naive-art-primitivism         3600
post-impressionism            3540
cubism                        3419
northern-renaissance          3273
pop-art                       2712
mannerism-late-renaissance    2536
minimalism                    2242
abstract-art                  2040
art-informel                  1888
early-renaissance             1876
ukiyo-e                       1857
high-renaissance              1759
magic-realism                 1758
color-field-painting          1597
Name: style, dtype: int64

### Data Augmentations

We are using Flip, Rotate and Random Crop augentations. Also we would be normalizing the images according to imagenet stats.

In [6]:
# Define transformations
transforms = A.Compose([
    A.Flip(p=0.5),
    A.Rotate(limit=10, 
             border_mode=cv2.BORDER_CONSTANT, 
             value=0.0, p=0.75),
    A.RandomResizedCrop(width=224, height=224, scale=(0.5, 1), p=1),
    A.Normalize(mean=(0.485, 0.456, 0.406), 
                std=(0.229, 0.224, 0.225), 
                max_pixel_value=255.0, 
                p=1.0)
])

### Dataset Class

This dataset class takes as parameters:
* Dataframe containing file locations, 
* Label dictionary with label names and their indexes
* transforms to be applied

In [7]:
class ArtDataset(Dataset):
    def __init__(self, df, label_dict, transforms, fs= None):
        self.df = df
        self.transforms = transforms
        self.label_dict = label_dict
        self.fs = fs
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        # Get filename and label
        filename = row['s3_path']
        #label = torch.zeros(25, dtype = torch.long)
        label = torch.tensor(label_dict[row['style']], dtype = torch.long)
        # Read image, correct color channels
        img = self.load_img(filename)
#        print(img)
        # adding this portion if the image has 4 channels or more -- Chandrish
        if len(img.shape) == 2:
            img = np.expand_dims(img, axis = 2)
            img = np.repeat(img, 3, axis = 2)
        elif img.shape[2] > 3:
            img = img[:, :, :3]
        # Augmentations + normalization
        transformed = self.transforms(image=img.astype(np.uint8))
        img = transformed['image']
        
        img = img.transpose(2, 0, 1)
        # Convert to tensor
        img = torch.tensor(img).float()
        #img = torch.permute(2, 0, 1)
        return img, label
    
    def load_img(self, s3_path):
        try:
            img_arr = skio.imread(s3_path)
            img_arr.shape
        except:
            img_arr = skio.imread('symbolism/baroness-fernand-van-der-bruggen-1900.jpg')
        return img_arr

### Transfer learning functions

The function below reinstantiates the head of the architecture and takes paramters:
* model: the model architecture
* model type: 'vgg'/'resnet'/'vit'
* num_classes: number of classes we are classifying

In [8]:
def set_classification_layer(model, model_type='vgg', num_classes=25):
    if model_type == 'vgg':
        model.classifier = nn.Sequential(
            nn.Linear(in_features=25088, out_features=4096, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5, inplace=False),
            nn.Linear(in_features=4096, out_features=4096, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5, inplace=False),
            nn.Linear(in_features=4096, out_features=num_classes, bias=True)
        )
    elif model_type == 'resnet':
        model.fc = nn.Linear(in_features=512, out_features=num_classes, bias=True)
    elif model_type == 'vit':
        model.heads = nn.Linear(in_features=768, out_features=num_classes, bias=True)
    elif model_type == 'convnext':
        model.classifier = nn.Sequential(
            nn.LayerNorm2d((768,), eps=1e-06, elementwise_affine=True),
            nn.Flatten(start_dim=1, end_dim=-1),
            nn.Linear(in_features=768, out_features=num_classes, bias=True)
        )
    else:
        print(f'Unknown model_type {model_type}. Acceptable types are: "vgg", "resnet", "vit", or "convnext"')   

The function below freezes all the layers of the architecture and makes the linear layers learnable.

In [9]:
def freeze_model(model, **classargs):
    '''
    Given an existing model, freeze pre-trained weights and
    re-instantiate the classifier.
    '''
    # Freeze all parameters
    for param in model.parameters():
        param.requires_grad = False
    # Re-instantiate the classifier head
    model = set_classification_layer(model, **classargs)

Testing out the above function, by loading in a VGG 19 model and checking it's parameters.

In [10]:
# Load VGG-19
vgg = models.vgg19()
for name, param in vgg.named_parameters():
    print(f"{name} gradient is set to", param.requires_grad)

features.0.weight gradient is set to True
features.0.bias gradient is set to True
features.2.weight gradient is set to True
features.2.bias gradient is set to True
features.5.weight gradient is set to True
features.5.bias gradient is set to True
features.7.weight gradient is set to True
features.7.bias gradient is set to True
features.10.weight gradient is set to True
features.10.bias gradient is set to True
features.12.weight gradient is set to True
features.12.bias gradient is set to True
features.14.weight gradient is set to True
features.14.bias gradient is set to True
features.16.weight gradient is set to True
features.16.bias gradient is set to True
features.19.weight gradient is set to True
features.19.bias gradient is set to True
features.21.weight gradient is set to True
features.21.bias gradient is set to True
features.23.weight gradient is set to True
features.23.bias gradient is set to True
features.25.weight gradient is set to True
features.25.bias gradient is set to True


In [11]:
# Freeze model
freeze_model(vgg, num_classes=25, model_type='vgg')
# Check frozen layers
for name, param in vgg.named_parameters():
    print(f"{name} gradient is set to", param.requires_grad)

features.0.weight gradient is set to False
features.0.bias gradient is set to False
features.2.weight gradient is set to False
features.2.bias gradient is set to False
features.5.weight gradient is set to False
features.5.bias gradient is set to False
features.7.weight gradient is set to False
features.7.bias gradient is set to False
features.10.weight gradient is set to False
features.10.bias gradient is set to False
features.12.weight gradient is set to False
features.12.bias gradient is set to False
features.14.weight gradient is set to False
features.14.bias gradient is set to False
features.16.weight gradient is set to False
features.16.bias gradient is set to False
features.19.weight gradient is set to False
features.19.bias gradient is set to False
features.21.weight gradient is set to False
features.21.bias gradient is set to False
features.23.weight gradient is set to False
features.23.bias gradient is set to False
features.25.weight gradient is set to False
features.25.bias g

After passing through the function all the layers except the heads have their requires grad as False.

### Training functions

We have two functions below
* eval_model: To calculate the loss and accuracy with a given model and dataloader
* train_model: To train the model based on training parameters, optimizer and learning rate scheduler.

In [12]:
# Dictionary for easily passing training arguments
training_params = {'epochs': 20,
                  'batch_size': 16,
                  'loss_fct': nn.CrossEntropyLoss()}


def eval_model(model, dl, training_params):
    # Get GPU if available
    device = 'cuda:1' if torch.cuda.is_available() else 'cpu'
    # Evaluate
    model.eval()
    # model = model.to(device)
    total_loss = 0
    total_obs = 0
    total_correct = 0
    loss_fct = training_params['loss_fct']
    for X, y in tqdm(dl):
        n_obs = len(y)
        # Forward pass and calculate loss
        yhat = model(X.to(device))#.softmax(dim=1)
        loss = loss_fct(yhat.to(device), y.to(device))
        total_loss += n_obs * loss.item()
        total_obs += n_obs
        # Calculate batch accuracy
        ypred = np.argmax(yhat.cpu().detach().numpy(), axis=1)
        y_arr = y.detach().numpy()
        total_correct += n_obs * accuracy_score(y_arr, ypred)
    # Return loss, accuracy
    avg_loss = total_loss / total_obs
    accuracy = total_correct / total_obs
    return avg_loss, accuracy
    
    
def train_model(model, optimizer, scheduler, train_ds, valid_ds, training_params):
    # Get loss function
    loss_fct = training_params['loss_fct']
    # Create dataloaders based on batch size
    batch_size = training_params['batch_size']
    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    valid_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=False)
    # Get GPU if available
    device = 'cuda:1' if torch.cuda.is_available() else 'cpu'
    model = model.to(device)
    # Train
    for _ in range(training_params['epochs']):
        # Put model in train mode
        model.train()
        # Train on training dataloader
        for X, y in tqdm(train_dl):
            # Clear gradients
            optimizer.zero_grad()
            # Forward pass and loss calculation
            yhat = model(X.to(device))#.softmax(dim=1)
            loss = loss_fct(yhat.to(device), y.to(device))
            # Backward pass and step
            loss.backward()
            optimizer.step()
        scheduler.step()  # update scheduler each epoch
        # Calculate loss, accuracy on train and validation
        train_loss, train_acc = eval_model(model, train_dl, training_params)
        valid_loss, valid_acc = eval_model(model, valid_dl, training_params)
        train_str = f"train loss: {train_loss:.4f} | train acc: {train_acc:.4f}"
        valid_str = f" | valid loss: {valid_loss:.4f} | valid acc: {valid_acc:.4f}"
        print(f'[{_}] ' + train_str + valid_str)

### Train Val Test Split

We are splitting the files into:
* Train: 70% of data (around 50K images)
* Valid: 15% of data
* Test: 15% of data

In [13]:
df = artworks.sample(frac = 1, random_state = 62).reset_index(drop = True)
split1 = int(0.7 * df.shape[0])
split2 = int(0.85 * df.shape[0])
train_df, valid_df, test_df = df.iloc[:split1].copy(), df.iloc[split1: split2].reset_index(drop = True), \
                                    df.iloc[split2:].reset_index(drop = True)

### Instantiate Datasets

Defining the Train, Valid and Test datasets.

In [14]:
# label dict
label_dict = {style: i for i, style in enumerate(sorted(artworks['style'].unique()))}

# creating Datasets
train_ds = ArtDataset(train_df, label_dict, transforms)
valid_ds = ArtDataset(train_df, label_dict, transforms)
test_ds = ArtDataset(train_df, label_dict, transforms)

----

### Training - VGG 19

We are loading a pretrained VGG19 model and freezing the layer and training it. <br>
The actual training happens using the script file which can be found [here](https://github.com/avmchandrish/art-style-classification/tree/main/scripts).

In [None]:
# VGG 19
from torchvision.models import vgg19
model = vgg19(pretrained = True)

# freezing the parameters
freeze_model(model, num_classes=25, model_type='vgg')

# training
from torch.optim.lr_scheduler import StepLR
optimizer = optim.Adam(model.parameters(), )
scheduler = StepLR(optimizer, step_size=4, gamma=0.5)
training_params = {'epochs': 10,
                  'batch_size': 128,
                  'loss_fct': nn.CrossEntropyLoss()}
train_model(model, optimizer, scheduler, train_ds, valid_ds, training_params)

### Training - ResNet 34

We are loading a pretrained ResNet34 model and freezing the layer and training it. <br>
The actual training happens using the script file which can be found [here](https://github.com/avmchandrish/art-style-classification/tree/main/scripts).

In [None]:
# Resnet 34
from torchvision.models import resnet34
model = resnet34(pretrained = True)

# freezing the parameters
freeze_model(model, num_classes=25, model_type='resnet')

# training
from torch.optim.lr_scheduler import StepLR
optimizer = optim.Adam(model.parameters(), )
scheduler = StepLR(optimizer, step_size=4, gamma=0.5)
training_params = {'epochs': 10,
                  'batch_size': 128,
                  'loss_fct': nn.CrossEntropyLoss()}
train_model(model, optimizer, scheduler, train_ds, valid_ds, training_params)

### Training - ViT

We are loading a pretrained Vision Transformer model and freezing the layer and training it. <br>
The actual training happens using the script file which can be found [here](https://github.com/avmchandrish/art-style-classification/tree/main/scripts).

In [None]:
# ViT
from torchvision.models import vit_b_16
model = vit_b_16(pretrained = True)

# freezing the parameters
freeze_model(model, num_classes=25, model_type='vit')

# training
from torch.optim.lr_scheduler import StepLR
optimizer = optim.Adam(model.parameters(), )
scheduler = StepLR(optimizer, step_size=4, gamma=0.5)
training_params = {'epochs': 10,
                  'batch_size': 128,
                  'loss_fct': nn.CrossEntropyLoss()}
train_model(model, optimizer, scheduler, train_ds, valid_ds, training_params)

----

### References
* Learning rate scheduler: https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.StepLR.html
* Torchvision models: https://pytorch.org/vision/stable/models.html