In [1]:
import cv2
import random
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import roc_auc_score, confusion_matrix
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm

# data augmentation
import albumentations as A
from albumentations.pytorch import ToTensorV2

# pretrained models
import torchvision
from torchvision import models, transforms

from functions import *
from classes import *

## Load in metadata

See data preprocessing in Siamese model notebook.

In [2]:
df = pd.read_csv('data/metadata.csv')

In [3]:
df['full_path'] = df.apply(lambda x: f"data/train_224_crop/{x['new_filename']}" if x['in_train'] == True else f"data/test_224_crop/{x['new_filename']}", axis=1)

In [4]:
df.head()

Unnamed: 0,artist,date,genre,pixelsx,pixelsy,size_bytes,source,style,title,artist_group,in_train,new_filename,full_path
0,Paul Serusier,1890.0,genre painting,7099.0,5857.0,9803854.0,wikiart,Cloisonnism,Seaweed Gatherer,train_and_test,False,32996.jpg,data/test_224_crop/32996.jpg
1,Georges Seurat,1884.0,,6367.0,4226.0,11579390.0,wikipedia,Pointillism,Bathers at Asnières,train_and_test,True,39751.jpg,data/train_224_crop/39751.jpg
2,Paul Signac,,cityscape,5616.0,4312.0,10612858.0,wikiart,Pointillism,View of the Port of Marseilles,train_and_test,True,74221.jpg,data/train_224_crop/74221.jpg
3,Georges Seurat,1884.0,genre painting,5910.0,4001.0,5330653.0,wikiart,Pointillism,Study for A Sunday on La Grande Jatte,train_and_test,True,31337.jpg,data/train_224_crop/31337.jpg
4,Gustave Caillebotte,1881.0,genre painting,5164.0,4087.0,3587461.0,wikiart,Impressionism,Rising Road,train_and_test,False,29616.jpg,data/test_224_crop/29616.jpg


In [5]:
all_train_df = df[df['in_train'] == True]
all_train_df = all_train_df.reset_index(drop=True)

In [6]:
test_df = df[df['in_train'] == False]
test_df = test_df.reset_index(drop=True)

In [7]:
# train test split on training dataset
train_df = all_train_df.sample(frac=0.8)
val_df = all_train_df[~all_train_df.index.isin(train_df.index)]
train_df.reset_index(drop=True, inplace=True)
val_df.reset_index(drop=True, inplace=True)

In [89]:
train_df.to_csv('data/train.csv', index=None)
val_df.to_csv('data/val.csv', index=None)

## Create Dataloader

In [8]:
artist_dict = {artist: i for i, artist in enumerate(sorted(df['artist'].unique()))}

In [10]:
train_ds = ArtistDataset(train_df, artist_dict)
train_dl = DataLoader(train_ds, batch_size=16, shuffle=True)

In [11]:
val_ds = ArtistDataset(val_df, artist_dict)
val_dl = DataLoader(val_ds, batch_size=16, shuffle=True)

## Pretrained AlexNet image classifier model

### LR = 0.01

In [13]:
model = models.alexnet(pretrained=True)

In [14]:
for param in model.parameters():
    param.requires_grad = False

In [15]:
model.classifier[6] = nn.Linear(4096, 52)

In [16]:
params_to_update = []

for param in model.parameters():
    if param.requires_grad == True:
        params_to_update.append(param)

In [17]:
optimizer = optim.Adam(params_to_update, lr=0.01)
lossFun = nn.CrossEntropyLoss()

In [19]:
num_epochs = 5
train_losses = []
valid_losses = []

for epoch in range(num_epochs):
    print('Epoch: ', epoch)
    
    train_loss, train_acc = one_pass_classification(model, train_dl, optimizer, lossFun)
    train_losses.append(train_loss)
    print('Train loss: ', train_loss)
    print('Train accuracy: ', train_acc)
    
    valid_loss, valid_acc = one_pass_classification(model, val_dl, optimizer, lossFun, backwards=False)
    valid_losses.append(valid_loss)
    print('Valid loss: ', valid_loss)
    print('Valid accuracy: ', valid_acc)

Epoch:  0


  0%|          | 0/536 [00:00<?, ?it/s]

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Train loss:  18.388087290436474
Train accuracy:  0.27623600746268656


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  17.239458891882826
Valid accuracy:  0.353544776119403
Epoch:  1


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  13.971160081785117
Train accuracy:  0.4430970149253731


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  18.456706370880354
Valid accuracy:  0.384794776119403
Epoch:  2


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  11.914644096658302
Train accuracy:  0.5191231343283582


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  22.363216891217586
Valid accuracy:  0.3810634328358209
Epoch:  3


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  10.99261307938775
Train accuracy:  0.5728777985074627


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  23.716668043563615
Valid accuracy:  0.39552238805970147
Epoch:  4


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  10.085046615618378
Train accuracy:  0.6002798507462687


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  24.891969808891638
Valid accuracy:  0.40158582089552236


In [79]:
torch.save(model, 'models/classifier_lr01.pt')

### LR = 0.01, WD = 0.01

In [45]:
model_4 = models.alexnet(pretrained=True)

In [46]:
for param in model_4.parameters():
    param.requires_grad = False

In [47]:
model_4.classifier[6] = nn.Linear(4096, 52)

In [48]:
params_to_update = []

for param in model_4.parameters():
    if param.requires_grad == True:
        params_to_update.append(param)

In [49]:
optimizer = optim.Adam(params_to_update, lr=0.01, weight_decay=0.01)
lossFun = nn.CrossEntropyLoss()

In [50]:
num_epochs = 5
train_losses = []
valid_losses = []

for epoch in range(num_epochs):
    print('Epoch: ', epoch)
    
    train_loss, train_acc = one_pass_classification(model_4, train_dl, optimizer, lossFun)
    train_losses.append(train_loss)
    print('Train loss: ', train_loss)
    print('Train accuracy: ', train_acc)
    
    valid_loss, valid_acc = one_pass_classification(model_4, val_dl, optimizer, lossFun, backwards=False)
    valid_losses.append(valid_loss)
    print('Valid loss: ', valid_loss)
    print('Valid accuracy: ', valid_acc)

Epoch:  0


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  16.11200719804906
Train accuracy:  0.24871735074626866


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  14.316991791796328
Valid accuracy:  0.3204291044776119
Epoch:  1


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  14.40693745150495
Train accuracy:  0.3420009328358209


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  15.332140513320468
Valid accuracy:  0.3185634328358209
Epoch:  2


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  14.012505290223592
Train accuracy:  0.36415578358208955


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  16.85602051820328
Valid accuracy:  0.3204291044776119
Epoch:  3


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  13.971494605292134
Train accuracy:  0.37080223880597013


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  15.745638580464606
Valid accuracy:  0.33348880597014924
Epoch:  4


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  14.351306552317604
Train accuracy:  0.37604944029850745


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  16.961115015086843
Valid accuracy:  0.31529850746268656


In [80]:
torch.save(model_4, 'models/classifier_lr01wd01.pt')

### LR = 0.01, WD = 0.001

In [51]:
model_5 = models.alexnet(pretrained=True)

In [52]:
for param in model_5.parameters():
    param.requires_grad = False

In [53]:
model_5.classifier[6] = nn.Linear(4096, 52)

In [54]:
params_to_update = []

for param in model_4.parameters():
    if param.requires_grad == True:
        params_to_update.append(param)

In [55]:
optimizer = optim.Adam(params_to_update, lr=0.01, weight_decay=0.001)
lossFun = nn.CrossEntropyLoss()

In [56]:
num_epochs = 5
train_losses = []
valid_losses = []

for epoch in range(num_epochs):
    print('Epoch: ', epoch)
    
    train_loss, train_acc = one_pass_classification(model_5, train_dl, optimizer, lossFun)
    train_losses.append(train_loss)
    print('Train loss: ', train_loss)
    print('Train accuracy: ', train_acc)
    
    valid_loss, valid_acc = one_pass_classification(model_5, val_dl, optimizer, lossFun, backwards=False)
    valid_losses.append(valid_loss)
    print('Valid loss: ', valid_loss)
    print('Valid accuracy: ', valid_acc)

Epoch:  0


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  4.303370224895762
Train accuracy:  0.013759328358208955


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  4.237398869955718
Valid accuracy:  0.018656716417910446
Epoch:  1


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  4.298799957802046
Train accuracy:  0.014692164179104478


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  4.237398889527392
Valid accuracy:  0.018656716417910446
Epoch:  2


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  4.3087844595090665
Train accuracy:  0.014692164179104478


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  4.2373988486048
Valid accuracy:  0.018656716417910446
Epoch:  3


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  4.303896164271369
Train accuracy:  0.013875932835820896


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  4.237398898423607
Valid accuracy:  0.018656716417910446
Epoch:  4


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  4.295518224363897
Train accuracy:  0.01632462686567164


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  4.237398873514204
Valid accuracy:  0.018656716417910446


In [81]:
torch.save(model_5, 'models/classifier_lr01wd001.pt')

### LR = 0.005

In [57]:
model_2 = models.alexnet(pretrained=True)

In [58]:
for param in model_2.parameters():
    param.requires_grad = False

In [59]:
model_2.classifier[6] = nn.Linear(4096, 52)

In [60]:
params_to_update = []

for param in model_2.parameters():
    if param.requires_grad == True:
        params_to_update.append(param)

In [61]:
optimizer = optim.Adam(params_to_update, lr=0.005)
lossFun = nn.CrossEntropyLoss()

In [62]:
num_epochs = 5
train_losses = []
valid_losses = []

for epoch in range(num_epochs):
    print('Epoch: ', epoch)
    
    train_loss, train_acc = one_pass_classification(model_2, train_dl, optimizer, lossFun)
    train_losses.append(train_loss)
    print('Train loss: ', train_loss)
    print('Train accuracy: ', train_acc)
    
    valid_loss, valid_acc = one_pass_classification(model_2, val_dl, optimizer, lossFun, backwards=False)
    valid_losses.append(valid_loss)
    print('Valid loss: ', valid_loss)
    print('Valid accuracy: ', valid_acc)

Epoch:  0


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  9.230837301976646
Train accuracy:  0.2888292910447761


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  9.500314895786456
Valid accuracy:  0.3269589552238806
Epoch:  1


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  6.839349558326735
Train accuracy:  0.45825559701492535


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  9.879660163352739
Valid accuracy:  0.37966417910447764
Epoch:  2


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  5.731082466278059
Train accuracy:  0.5451259328358209


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  10.380617358791294
Valid accuracy:  0.3927238805970149
Epoch:  3


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  5.208544245517966
Train accuracy:  0.578008395522388


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  11.57838810201901
Valid accuracy:  0.3885261194029851
Epoch:  4


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  4.947100140154362
Train accuracy:  0.6087919776119403


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  12.28867555376309
Valid accuracy:  0.42350746268656714


In [82]:
torch.save(model_2, 'models/classifier_lr005.pt')

### LR = 0.005, WD = 0.1

In [39]:
model_3 = models.alexnet(pretrained=True)

In [40]:
for param in model_3.parameters():
    param.requires_grad = False

In [41]:
model_3.classifier[6] = nn.Linear(4096, 52)

In [42]:
params_to_update = []

for param in model_3.parameters():
    if param.requires_grad == True:
        params_to_update.append(param)

In [43]:
optimizer = optim.Adam(params_to_update, lr=0.005, weight_decay=0.1)
lossFun = nn.CrossEntropyLoss()

In [44]:
num_epochs = 5
train_losses = []
valid_losses = []

for epoch in range(num_epochs):
    print('Epoch: ', epoch)
    
    train_loss, train_acc = one_pass_classification(model_3, train_dl, optimizer, lossFun)
    train_losses.append(train_loss)
    print('Train loss: ', train_loss)
    print('Train accuracy: ', train_acc)
    
    valid_loss, valid_acc = one_pass_classification(model_3, val_dl, optimizer, lossFun, backwards=False)
    valid_losses.append(valid_loss)
    print('Valid loss: ', valid_loss)
    print('Valid accuracy: ', valid_acc)

Epoch:  0


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  6.6836837819263115
Train accuracy:  0.23717350746268656


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  6.537798031052547
Valid accuracy:  0.251865671641791
Epoch:  1


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  6.7922886027329
Train accuracy:  0.2578125


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  6.271312327527288
Valid accuracy:  0.2574626865671642
Epoch:  2


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  6.6583087524371365
Train accuracy:  0.26096082089552236


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  6.40278647550896
Valid accuracy:  0.28544776119402987
Epoch:  3


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  6.8443153169172914
Train accuracy:  0.2583955223880597


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  6.457734236076696
Valid accuracy:  0.26492537313432835
Epoch:  4


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  6.730166456592617
Train accuracy:  0.26317630597014924


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  6.755594449256783
Valid accuracy:  0.2667910447761194


In [83]:
torch.save(model_3, 'models/classifier_lr005wd1.pt')

## Compare models using validation data

In [66]:
val_pairs_df = pd.read_csv('data/validation_pairs.csv')

In [85]:
test_pairs_df = pd.read_csv('data/test_pairs.csv')

In [69]:
val_ds = ArtistPairsDataset(val_pairs_df)
val_dl = DataLoader(val_ds, batch_size=16, shuffle=True)

In [86]:
test_ds = ArtistPairsDataset(test_pairs_df)
test_dl = DataLoader(test_ds, batch_size=16, shuffle=True)

In [73]:
# model LR = 0.01
validation_check(model, val_dl)

  0%|          | 0/371 [00:00<?, ?it/s]

[[3707   43]
 [ 791 1385]]
0.8125111519607844


In [74]:
# model LR = 0.005
validation_check(model_2, val_dl)

  0%|          | 0/371 [00:00<?, ?it/s]

[[3658   92]
 [1097 1079]]
0.7356653186274511


In [75]:
# model LR = 0.005 WD = 0.1
validation_check(model_3, val_dl)

  0%|          | 0/371 [00:00<?, ?it/s]

[[3614  136]
 [1688  488]]
0.5939990196078432


In [76]:
# model LR = 0.01 WD = 0.01
validation_check(model_4, val_dl)

  0%|          | 0/371 [00:00<?, ?it/s]

[[3478  272]
 [1952  224]]
0.5152039215686275


In [77]:
# model LR = 0.01 WD = 0.001
validation_check(model_5, val_dl)

  0%|          | 0/371 [00:00<?, ?it/s]

[[3397  353]
 [1908  268]]
0.5145142156862745


## Additional training

In [24]:
train_losses = []
valid_losses = []

for epoch in range(5, 10):
    print('Epoch: ', epoch)
    
    train_loss, train_acc = one_pass_classification(model, train_dl, optimizer, lossFun)
    train_losses.append(train_loss)
    print('Train loss: ', train_loss)
    print('Train accuracy: ', train_acc)
    
    valid_loss, valid_acc = one_pass_classification(model, val_dl, optimizer, lossFun, backwards=False)
    valid_losses.append(valid_loss)
    print('Valid loss: ', valid_loss)
    print('Valid accuracy: ', valid_acc)

Epoch:  5


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  9.311409385235452
Train accuracy:  0.6388759328358209


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  25.643175865287212
Valid accuracy:  0.4221082089552239
Epoch:  6


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  8.458500471061665
Train accuracy:  0.6674440298507462


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  28.19134314380475
Valid accuracy:  0.4197761194029851
Epoch:  7


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  8.53054004904437
Train accuracy:  0.668027052238806


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  29.177496504427783
Valid accuracy:  0.41044776119402987
Epoch:  8


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  7.9779903518397415
Train accuracy:  0.6926305970149254


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  30.417312650538204
Valid accuracy:  0.4207089552238806
Epoch:  9


  0%|          | 0/536 [00:00<?, ?it/s]

Train loss:  8.392241106942423
Train accuracy:  0.703125


  0%|          | 0/134 [00:00<?, ?it/s]

Valid loss:  32.41125769935437
Valid accuracy:  0.4193097014925373


## Run test

In [87]:
validation_check(model, test_dl)

  0%|          | 0/643 [00:00<?, ?it/s]

[[9692  310]
 [ 215   67]]
0.6032974256212588
