In [None]:
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score
from torch.optim import Adam, lr_scheduler
from torchvision import transforms as T
import torchvision.models as models
import matplotlib.pyplot as plt
import os
from os.path import join
from glob import glob
from PIL import Image
from torch import nn
import pandas as pd
import numpy as np
import torchvision
import torch
import joblib
from sklearn import svm
import random
import tarfile
import io
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")
from collections import Counter
import seaborn as sns
import encoding

In [None]:
device = 'cuda:1'

In [None]:
train_df = pd.read_csv('/data/wikiart/wikiart_Painting100k/MultitaskPainting100k_Dataset_groundtruth/groundtruth_multiloss_train_header.csv')
valid_df = pd.read_csv('/data/wikiart/wikiart_Painting100k/MultitaskPainting100k_Dataset_groundtruth/groundtruth_multiloss_test_header.csv')

In [None]:
train_df['img_path'] = train_df.apply(lambda x: join('/data/wikiart/wikiart_Painting100k/images_256minside',x.filename),1)
valid_df['img_path'] = valid_df.apply(lambda x: join('/data/wikiart/wikiart_Painting100k/images_256minside',x.filename),1)

In [None]:
class_dict_genre = {}
for i, genre in enumerate(np.sort(train_df['genre'].unique())):
    train_df.loc[train_df['genre']==genre, 'class_genre'] = i
    valid_df.loc[valid_df['genre']==genre, 'class_genre'] = i
    class_dict_genre.update({i:genre})

In [None]:
tr = T.Compose([
    T.Resize(256),
    T.RandomResizedCrop(size=224, scale=(0.3,1), ratio=(1,1)),
    T.RandomHorizontalFlip(p=0.5)
])

In [None]:
row = train_df.iloc[3]
tr(Image.open(row.img_path))

In [None]:
train_transforms = T.Compose([
        T.Resize(256), 
        T.RandomResizedCrop(size=224, scale=(0.3,1), ratio=(1, 1)), #size 384. scale specifies the lower and upper bounds for the random area of the crop
        T.RandomHorizontalFlip(p=0.5), #p probability of the image being flipped
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], 
                    std=[0.229, 0.224, 0.225])])
valid_transforms = T.Compose([
        T.Resize(224), 
        T.CenterCrop(224), 
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], 
                    std=[0.229, 0.224, 0.225])])

In [None]:
class ImageDataset(Dataset):
    
    def __init__(self, df, transform):
        self.df = df
        self.transform = transform
            
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.loc[idx]
        im = Image.open(row.img_path).convert('RGB')
        return self.transform(im), torch.LongTensor([float(row.class_genre)])

In [None]:
batch_size=16

train_dataset = ImageDataset(train_df.reset_index(drop=True), train_transforms)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=False, num_workers=6)


valid_dataset = ImageDataset(valid_df.reset_index(drop=True), valid_transforms)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, pin_memory=False, num_workers=6)

In [None]:
def imshow(image):
    npimg = image.numpy().transpose(1, 2, 0)
    npimg = npimg/(npimg.max()-npimg.min())+0.5
    plt.imshow(npimg)
    plt.axis("off")
    plt.show()

In [None]:
dataiter = iter(train_loader)
img, target = dataiter.next()
#concatenate = torch.cat((anchor, positive, negative), 0)
#imshow(torchvision.utils.make_grid(concatenate, nrow = batch_size))
imshow(torchvision.utils.make_grid(img, nrow = batch_size))

In [None]:
imshow(img[7])

In [None]:
resnest = encoding.models.get_model('ResNeSt50', pretrained=True)
resnest.fc = nn.Identity()

condition = torch.LongTensor([0]).to(device)

class Net(torch.nn.Module):
    def __init__(self, base_model, condition):
        super(Net, self).__init__()
        self.base=base_model
        self.fc = nn.Sequential( 
            nn.PReLU(),
            nn.Linear(2048, 41)
            )
        self.masks = torch.nn.Embedding(1, 2048)
        mask_array = np.zeros([1, 2048])
        mask_array.fill(0.1)
        mask_array[0, 0:1024] = 1
        self.masks.weight = torch.nn.Parameter(torch.Tensor(mask_array), requires_grad=True)
        self.condition = condition
        
    def forward(self, x):
        embed = self.base(x)
        self.mask = self.masks(self.condition)
        self.mask = torch.nn.functional.relu(self.mask)
        masked_embed = embed * self.mask
        
        out = self.fc(masked_embed)
        return out

In [None]:
model = Net(resnest, condition).to(device)

lr = 1.e-3
#optimizer = torch.optim.SGD([{'params':model.base.parameters(), 'lr':1.e-6},{'params':model.fc.parameters(),   'lr':lr}], lr=lr, momentum=0.9, weight_decay=0.0001)
optimizer = torch.optim.Adam([{'params':model.base.parameters(), 'lr':1.e-5}, {'params':model.fc.parameters(),   'lr':lr}], lr=lr, weight_decay=1.e-4)
scheduler = torch.optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=lambda epoch: 0.98)
criterion = nn.CrossEntropyLoss()
softmax = nn.Softmax(dim=1)

In [None]:
def train(model, loss_func, device, train_loader, optimizer, epoch):
    t = Timer(); t.start()
    model.to(device)
    model.train()
    loss_list, pred_list, real_list = [], [], []
    

    for batch_idx, (img, label) in enumerate(train_loader):

        img, label = img.to(device), label.to(device)
        optimizer.zero_grad()
        
        out  = model(img)
        loss = loss_func(out, label.squeeze(1))
        
        values, indices = torch.max(softmax(out), dim=1)
        pred_list.append(indices.flatten().detach().cpu().numpy())
        real_list.append(label.flatten().detach().cpu().numpy())
        
        loss_list.append(loss.item())
        loss.backward()
        optimizer.step()
        
        real = np.concatenate(real_list, 0).flatten()
        pred = np.concatenate(pred_list, 0).flatten()
        acc  = accuracy_score(real, pred)
        
        template = "Train--> [{}:{}] Iteration {} ({:3.1f}%): Loss = {:.4f} | Accuracy = {:.3f}\r"
        loss_arr = np.array(loss_list)
        percentage = 100*batch_idx/len(train_loader)
        stop_time = t.stop()
        print(template.format(round(stop_time/60), round(stop_time)%60, batch_idx, percentage, np.mean(loss_arr), acc), end='')
        if percentage == 100: break            
    stop_time = t.stop()    
    print(template.format(round(stop_time/60), round(stop_time)%60, batch_idx, percentage, np.mean(loss_arr), acc))
    return model, np.mean(loss_arr), acc

In [None]:
def valid(model, loss_func, device, loader, epoch):
    t = Timer(); t.start()
    model.to(device)
    model.eval()
    loss_list, pred_list, real_list = [], [], []

    for batch_idx, (img, label) in enumerate(loader):
        
        img, label = img.to(device), label.to(device)
        
        out = model(img)
        loss = loss_func(out, label.squeeze(1))
       
        loss_list.append(loss.item())
        
        values, indices = torch.max(softmax(out), dim=1)
        pred_list.append(indices.flatten().detach().cpu().numpy())
        real_list.append(label.flatten().detach().cpu().numpy())

        percentage = batch_idx/len(loader)
        print('Computing Validation ({:.1%})\r'.format(percentage), end='')
        if percentage == 1: break

    real = np.concatenate(real_list, 0).flatten()
    pred = np.concatenate(pred_list, 0).flatten()

    acc = accuracy_score(real, pred)

    template = "Valid--> [{}:{}] Iteration {}: Loss = {:.4f} | Accuracy = {:.3f}"
    loss_arr = np.array(loss_list)
    stop_time = t.stop()
    print(template.format(round(stop_time/60), round(stop_time)%60, batch_idx, np.mean(loss_arr), acc),)
    return np.mean(loss_arr), acc, np.concatenate(pred_list, 0), np.concatenate(real_list, 0)

In [None]:
def save_loss_fig(loss_train, loss_valid, accuracy_train, accuracy_valid, epoch):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15,4))
    ax1.plot([i for i in range(len(loss_train))], loss_train, label='train_loss')
    ax1.plot([i for i in range(len(loss_valid))],  loss_valid,  label='valid_loss')
    ax1.set(xlabel='epoch', ylabel='loss')
    ax1.legend()

    ax2.plot([i for i in range(len(accuracy_train))], accuracy_train, label='train_accuracy')
    ax2.plot([i for i in range(len(accuracy_valid))],  accuracy_valid,  label='valid_accuracy')
    ax2.set(xlabel='epoch', ylabel='accuracy')
    ax2.legend()
    fig.suptitle(f'EPOCH {epoch}', fontsize=16)
    plt.close(fig)
    fig.savefig(os.path.join('.', "loss_plot_genre_resnest.jpg"), pad_inches=0)

In [None]:
import time
class Timer:
    def __init__(self):
        self._start_time = None
    def start(self):
        self._start_time = time.perf_counter()
    def stop(self):
        elapsed_time = time.perf_counter() - self._start_time
        return round(elapsed_time)

In [None]:
train_loss_list, train_acc_list, valid_loss_list, valid_acc_list = [], [], [], []
best_criterion = 2

for epoch in range(50):
    
    print('Epoch: {}'.format(epoch))
    
    model, train_loss, acc_train= train(model, criterion, device, train_loader, optimizer, epoch)
    
    valid_loss, acc_valid, pred, real = valid(model, criterion, device, valid_loader, epoch) 
    #scheduler.step(valid_loss)
    if valid_loss < best_criterion:
        
        best_criterion = valid_loss
        torch.save(model, './resnest_genre_model.pt')
        print('------------------------------------------------------------------ Best Model ------------------------------------------------------------------')
                                     
    train_loss_list.append(train_loss)
    train_acc_list.append(acc_train)
    valid_loss_list.append(valid_loss)
    valid_acc_list.append(acc_valid)
    save_loss_fig(train_loss_list, valid_loss_list, train_acc_list, valid_acc_list, epoch)

In [None]:
device='cpu'

In [None]:
model = torch.load('/data/Notebook/CNN/resnest_genre_model.pt', map_location=device).to(device)

In [None]:
truelabels = []
predictions = []
outs = []
model.eval()

for data, target in valid_loader:
    for label in target.data.numpy():
        truelabels.append(label)
    out = model(data.to(device))
    outs.append(out.detach().cpu().numpy())
    for prediction in out.data.argmax(1):
        predictions.append(prediction.detach().cpu().numpy().item())

In [None]:
list_genre_name = []
list_genre_lab = []
for i, name in class_dict_genre.items():
    list_genre_name.append(name)
    list_genre_lab.append(i)

In [None]:
#confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(truelabels, predictions, labels = list_genre_lab , normalize='true')
cm.diagonal()/cm.sum(axis=1)

In [None]:
tick_marks = np.arange(len(list_genre_name))
df_cm = pd.DataFrame(cm, index = list_genre_name, columns = list_genre_name)
plt.figure(figsize = (30,30))
sns.heatmap(df_cm, annot=True, cmap=plt.cm.Reds, fmt='.2f')
plt.xlabel("Predicted", fontsize = 10)
plt.ylabel("True", fontsize = 10)
plt.show()

In [None]:
from sklearn.metrics import classification_report
print(classification_report(truelabels, predictions, labels=list_genre_lab, target_names = list_genre_name))

In [None]:
from sklearn.metrics import precision_score
print("precision: ", precision_score(truelabels, predictions, average='weighted'))

## Example

In [None]:
#display(test_df.filename.iloc[30])

In [None]:
row = valid_df.sample(1).iloc[0]
print(f'Actual genre: {row.genre}')
im = Image.open(row.img_path).convert('RGB')
X = []
#out = model(test_transforms(im).unsqueeze(0).to(device))
#X.append(out.detach().cpu().numpy())
#pred = clf.predict(np.concatenate(X))
pred = model(valid_transforms(im).unsqueeze(0).to(device)).argmax(1)
#pred_probs = torch.nn.functional.softmax(pred_tensor, dim=1).data.numpy()
print(f'Predi. genre: {class_dict_genre[pred.item()]}')
T.Resize(250)(im)

In [None]:
#outs[0]

In [None]:
#saliency map https://github.com/sunnynevarekar/pytorch-saliency-maps/blob/master/Saliency_maps_in_pytorch.ipynb