My first run through this experimentation will be following the model from the paper [Artist Identification with Convulutional Neural Networks](http://cs231n.stanford.edu/reports/2017/pdfs/406.pdf), and working with the [same dataset from Kaggle](https://www.kaggle.com/c/painter-by-numbers). I will initially only be working with a subset of the dataset, train_1.zip, partly due to my ISP's data caps and my current computer setup. In the following month I hope to be able to download more of the data set. I will be using PyTorch for my neural network(s).

UPDATE 06/30/18: I have decided to download the full dataset

In process.py we processed out all of the artists with less than 300 paintings in the dataset so as to ensure that there are sufficient samples for each artist to train on and learn from.

In [1]:
%matplotlib inline

In [2]:
import csv
import os
from os.path import expanduser

#trim the irrelevant files; make a csv of our subset of the data
data_dir = expanduser("~") +"/Data/"
all_artist_data = data_dir + "all_artist_data.csv"
filtered = data_dir + "filtered.csv"
artist_train = data_dir + "train"

In [3]:
label_to_artist = {}
artist_to_label = {}
label_counter = 0

with open(filtered, "r", encoding="utf8") as csvfile:
    reader = csv.reader(csvfile, quotechar='\"')
    header = next(reader)
    for row in reader:
        if row[0] not in artist_to_label:
            label_to_artist[label_counter] = row[0]
            artist_to_label[row[0]] = label_counter
            label_counter += 1
            

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BaslineCNN(nn.Module):
    
    def __init__(self):
        super(BaslineCNN, self).__init__()
        # 3 input channels, 32 output channels, 3x3 square convolution kernel
        self.conv1 = nn.Conv2d(3,64,3, stride=2, padding=1)
        self.pool = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(64,64,3, stride=2, padding=1)
        self.fc1 = nn.Linear(6272,228)
        self.fc2 = nn.Linear(228,len(artist_to_label))
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 6272)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
        
net = BaslineCNN()
    
print(net)

BaslineCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (fc1): Linear(in_features=6272, out_features=228, bias=True)
  (fc2): Linear(in_features=228, out_features=38, bias=True)
)


Now we want to load in an transform our data into proper format. This involves implementing the Dataset asbtract class as well as the DataLoader class with versions specific to our data and our desired transformations. To follow along with the paper, we are going to randomly crop 224x244 images out of the training images. 

In [5]:
import pandas as pd
import torch.utils.data
import torchvision
import torchvision.transforms as transforms
from PIL import Image

# we trust this dataset (I think?)
Image.MAX_IMAGE_PIXELS = None

train_loader_transform = transforms.Compose([transforms.RandomCrop(224),
                                             transforms.ToTensor(),
                                             transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])

class ArtistImageDataset(torch.utils.data.Dataset):
    def __init__(self,text_file,img_dir,transform=train_loader_transform):
        self.name_frame = pd.read_csv(text_file,sep=",",usecols=range(11,12))
        self.label_frame = pd.read_csv(text_file,sep=",",usecols=range(1))
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.name_frame)

    def __getitem__(self, index):
        img_name = os.path.join(self.img_dir, self.name_frame.iloc[index, 0])
        image = Image.open(img_name).convert('RGB')
        image = self.transform(image)
        labels = artist_to_label[self.label_frame.iloc[index, 0]]
        sample = {'images': image, 'labels': labels}

        return sample
    
artist_image_dataset = ArtistImageDataset(text_file=filtered, img_dir = artist_train)

In [6]:
import numpy as np
from torch.utils.data.sampler import SubsetRandomSampler

#split into train, test, and validation sets
num_imgs = len(artist_image_dataset)
indices = list(range(num_imgs))
test_indices = np.random.choice(indices, size=num_imgs//10, replace=False)
train_indices = list(set(indices) - set(test_indices))
indices = train_indices
validation_indices = np.random.choice(indices, size=num_imgs//10, replace=False)
train_indices = list(set(indices) - set(validation_indices))

train_sampler = SubsetRandomSampler(train_indices)
test_sampler = SubsetRandomSampler(test_indices)
validation_sampler = SubsetRandomSampler(validation_indices)

train_loader = torch.utils.data.DataLoader(artist_image_dataset,
                                           batch_size=16,
                                           num_workers=2,
                                           sampler=train_sampler)
test_loader = torch.utils.data.DataLoader(artist_image_dataset,
                                           batch_size=16,
                                           sampler=test_sampler)
val_loader = torch.utils.data.DataLoader(artist_image_dataset,
                                         batch_size=16,
                                         sampler=validation_sampler)

In [7]:
import matplotlib.pyplot as plt
import numpy as np

# image-showing code taken from the PyTorch tutorial

# functions to show an image


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

'''
# get some random training images
dataiter = iter(train_loader)
sample = dataiter.next()
# show images
imshow(torchvision.utils.make_grid(sample['images']))
# print labels
print([label_to_artist[s.data.numpy()[()]] for s in sample['labels']])
print(sample['labels'])
print(len(sample['labels']))
'''

"\n# get some random training images\ndataiter = iter(train_loader)\nsample = dataiter.next()\n# show images\nimshow(torchvision.utils.make_grid(sample['images']))\n# print labels\nprint([label_to_artist[s.data.numpy()[()]] for s in sample['labels']])\nprint(sample['labels'])\nprint(len(sample['labels']))\n"

In [8]:
import torch.optim as optim

#make sure we do things on the gpu
net.cuda()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr = 0.001)

In [9]:
import matplotlib.pyplot as plt
#re init
train_loader = torch.utils.data.DataLoader(artist_image_dataset,
                                           batch_size=16,
                                           num_workers=2,
                                           sampler=train_sampler)
test_acc = []
iterations = []
train_len = len(train_loader)

for epoch in range(10):
    
    running_loss = 0
    
    for i, sample in enumerate(train_loader):
        images, labels = sample['images'].cuda(), sample['labels'].cuda()
        optimizer.zero_grad()
        outputs = net(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 200 == 199:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0
            with torch.no_grad():
                total = 0
                correct = 0
                for sample in test_loader:
                    images,labels = sample['images'].cuda(), sample['labels'].cuda()
                    outputs = net(images)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
                test_acc.append(100*correct/total)
                iterations.append((i+1) + (train_len * epoch))
                print("[%d, %5d] test accuracy: %.3f" % (epoch + 1, i + 1, 100*correct/total))
                    
print('done')
plt.plot(iterations, test_acc)
plt.xlabel('number of iterations')
plt.ylabel('percent accuracy')
plt.show()

[1,   200] loss: 3.506
[1,   200] test accuracy: 9.571
[1,   400] loss: 3.307
[1,   400] test accuracy: 11.786
[1,   600] loss: 3.207
[1,   600] test accuracy: 14.071
[2,   200] loss: 3.058
[2,   200] test accuracy: 15.071
[2,   400] loss: 3.069
[2,   400] test accuracy: 17.429
[2,   600] loss: 3.000
[2,   600] test accuracy: 18.714
[3,   200] loss: 2.942
[3,   200] test accuracy: 17.571
[3,   400] loss: 2.918
[3,   400] test accuracy: 19.786
[3,   600] loss: 2.877
[3,   600] test accuracy: 20.714
[4,   200] loss: 2.852
[4,   200] test accuracy: 21.857
[4,   400] loss: 2.798
[4,   400] test accuracy: 21.714
[4,   600] loss: 2.779
[4,   600] test accuracy: 18.857
[5,   200] loss: 2.798
[5,   200] test accuracy: 21.929
[5,   400] loss: 2.757
[5,   400] test accuracy: 24.357


Process Process-10:
Process Process-9:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/cole/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/cole/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/cole/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/cole/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/cole/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 57, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/home/cole/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 57, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/home/cole/anaconda3/lib/python3.6/site-p

KeyboardInterrupt: 

Now let's run this model against our test dataset

In [None]:
correct = 0
total = 0
# no grad because we don't want our model to update during testing
with torch.no_grad():
    for sample in validation_loader:
        images,labels = sample['images'].cuda(), sample['labels'].cuda()
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
print('Accuracy of the network on the 1400 validation images: {0:.2f}'.format(100 * correct/total))

The paper noted getting better results using Adam vs SGD + Momentum, so I decided to check out the results on my own. I ended up finding SGD + Momentum to be better for this particular dataset.

In [10]:
torch.save(net.state_dict(), "first_real_model")