## Van Gogh and other artist
This notebook is dedicated for running generic pipeline on Colab, kaggle token is required for dataset download.

1. Clone the repository to your colab area:

In [None]:
!git clone https://github.com/czkaiweb/vanGogh-and-Other-Artist.git

# To fetch the change from git repo
%cd /content/vanGogh-and-Other-Artist
!git fetch 
!git pull
#!git checkout develop
%cd /content



2. Import the files and needed packages:

In [None]:
import sys
sys.path.append('/content/vanGogh-and-Other-Artist')
sys.path.append('/content/vanGogh-and-Other-Artist/preprocessing')
sys.path.append('/content/vanGogh-and-Other-Artist/model')
from genericCNN import *
from preprocessing.ImageTranform import *
from torchsummary import summary

import shutil
import os
import glob
import pandas as pd
import numpy
from tqdm import tqdm
import hashlib

from torchvision import datasets, models, transforms
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

%load_ext autoreload
%autoreload 2

3.Create image output folder:

In [None]:
# Create Meta record
fileList = []
img_path = "./imgs"

if not os.path.isdir(img_path):
    os.mkdir(img_path)

4. Setup Kaggle token file:

In [None]:
kaggle_path = "/root/.kaggle"
if not os.path.isdir(kaggle_path):
  os.mkdir(kaggle_path)


5. In the left column, click "File", go to parent folder, check if  /root/.kaggle is existed. Upload your kaggle token file (usually it is named kaggle.json) there.
Using "chmod" to set proper authentication to protect your token

In [None]:
!chmod 600 /root/.kaggle/kaggle.json

6. Download the van gogh dataset, unzip the dataset and append the meta data:

In [None]:
# Download van Gogh dataset
!kaggle datasets download -d ipythonx/van-gogh-paintings

# List all von Goph plots
tmp_path = "./tmp"
vangoghZip = 'van-gogh-paintings.zip'
try:
    shutil.unpack_archive(vangoghZip,tmp_path)
except Exception as err:
    print(err)

allVanGogh = glob.glob(tmp_path+'/*/*.jpg')

# Append metadata
for index in tqdm(range(len(allVanGogh))):
    fileName = allVanGogh[index]
    file = fileName.split("/")[-1]
    hashName = hashlib.md5(file.encode()).hexdigest()
    shutil.move(fileName, img_path + "/" + hashName + ".jpg", copy_function = shutil.copy2)
    artist = "vanGogh"
    fileList.append([hashName,artist])
    
# Clean tmp data
try:
    shutil.rmtree(tmp_path)
    os.remove(vangoghZip)
except Exception as err:
    print(err)

7. Download the monet dataset, unzip the dataset and append the meta data:

In [None]:
!kaggle datasets download -d srrrrr/monet2photo

# List all Monet plots
tmp_path = "./tmp"
monetZip = 'monet2photo.zip'
try:
    shutil.unpack_archive(monetZip,tmp_path)
except Exception as err:
    print(err)

allMonet = glob.glob(tmp_path+'/*/trainA/*.jpg')

# Append metadata
for index in tqdm(range(len(allMonet))):
    fileName = allMonet[index]
    file = fileName.split("/")[-1]
    hashName = hashlib.md5(file.encode()).hexdigest()
    shutil.move(fileName, img_path + "/" + hashName + ".jpg", copy_function = shutil.copy2)
    artist = "Monet"
    fileList.append([hashName,artist])
    
# Clean tmp data
try:
    shutil.rmtree(tmp_path)
    os.remove(monetZip)
except Exception as err:
    print(err)

8. Download the customized dataset, unzip the dataset and append the meta data:

In [None]:
# Download WikiArts dataset: https://www.kaggle.com/datasets/antoinegruson/-wikiart-all-images-120k-link
!kaggle datasets download -d czkaiweb/subwikiarts

# List all wikiarts plots
tmp_path = "./tmp"
wikiartsZip = 'subwikiarts.zip'
try:
    shutil.unpack_archive(wikiartsZip,tmp_path)
except Exception as err:
    print(err)

WikiArtsMeta = tmp_path+"/WikiArts.csv"
WikiArtsDF = pd.read_csv(WikiArtsMeta)
WikiArtsList = WikiArtsDF[["hash","Artist"]].values

def findGroup(head):
    if head <= "33":
        return "/GroupA/"
    elif head <= "69":
        return "/GroupB/"
    elif head <= "9d":
        return "/GroupC/"
    elif head <= "cc":
        return "/GroupD/"
    else:
        return "/GroupE/"
    
# Set to true for group splitting
preClean = False
if preClean == True:
    for char in ["A","B","C","D","E"]:
        groupDir = img_path+"/Group{}".format(char)
        if not os.path.isdir(groupDir):
            os.mkdir(groupDir)

for record in WikiArtsList:
    groupDir = "/./"
    if preClean:
        groupDir = findGroup(record[0][:2])
    fileName = tmp_path+"/imgs/"+record[0]+".jpg"
    shutil.move(fileName, img_path+ groupDir + "/" , copy_function = shutil.copy2)
    
# Clean tmp data
try:
    shutil.rmtree(tmp_path)
    os.remove(wikiartsZip)
except Exception as err:
    print(err)


9. Create the csv file for meta data:

In [None]:
# Save to meta file
metaDF = pd.DataFrame(fileList,columns = ["hash","Artist"])
metaDF = pd.concat([metaDF,WikiArtsDF[["hash","Artist"]]])
metaDF.to_csv("meta.csv")

10. Import the generic pipeline and transformer if not imported yet, set the image transformer to the genericCNN

In [None]:
#Initialize the object
myObj = genericCNN()

# Set up the transformer
myTransform = ImageTransformer((224,224))
myTransform.initTransform()
transformer = myTransform.getTransformer()

myObj.setTransformer(transformer)
# Decide if adding normalization layer at the end of transformation, by default, normalization will be added
# myObj.UseNormalized(normalize = True)

11. Set meta data and input path, split the dataset and load the data to Dataset/DatasetLoader

In [None]:
# Set up the meta data and path to image dataset
myObj.setDataset("meta.csv",path = "imgs")

# Split the data by portion, fraction indicate the percentage of data used in the whole dataset. 
# Default: val_size = 0.2, test_size = 0.1 
#myObj.splitData(val_size=0.05,test_size = 0.8,fraction = 1)
#myObj.splitData(val_size=0.2,test_size = 0.1,fraction = 1)
myObj.splitData(val_size=0.1,test_size = 0.7,fraction = 1)

# Will automatically get the statistic for training set, update the mean/std used for normalization. 
# loadData and checkDataset
myObj.loadData(reUseTrain=3)
#myObj.loadData()

12. Display a batch:

In [None]:
myObj.showDatasetBatch()

13. Select model and edit the architecture:

In [None]:
# Use the pre-trained model
#model_ft = models.vgg16(pretrained=True)
#model_ft = models.efficientnet_b2(pretrained=True)
model_ft = models.efficientnet_b0(pretrained=True)
#model_ft.load_state_dict(torch.load('./weights/vgg16-397923af.pth'))
num_ftrs = model_ft.classifier[1].in_features
model_ft.classifier[1] = nn.Linear(num_ftrs, 6)
model_ft = model_ft.to(myObj.device)

# Specifiy the criterion:
criterion = nn.CrossEntropyLoss()

# Specify the optizimer
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Specift the learning rate scheduler. Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

14. Set the model for the generic object:

In [None]:
#myObj.setModel(model = model_ft,modeltag="EfficientNetB2mod")
myObj.setModel(model = model_ft,modeltag="EfficientNetB0mod")

15: Train the model:

In [None]:
myObj.train_model(criterion, optimizer_ft, exp_lr_scheduler, num_epochs=13)
#myObj.evaluate()
#myObj.drawHistory()

16. Evaluate the model:

In [None]:
myObj.evaluate()

In [None]:
myObj.drawHistory()

17. Save the weights as pth file:

In [None]:
torch.save(myObj.Model.state_dict(), 'model_weights_EfficientNetB0_newTrainTestSplits_13epochs_001lr_9momentum.pth')

18. Download the weight file from colab:

In [None]:
from google.colab import files
files.download("model_weights_EfficientNetB0_newTrainTestSplits_13epochs_001lr_9momentum.pth")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
plt.scatter(np.arange(1,22,1), myObj.trainAccu)
plt.scatter(np.arange(1,22,1), myObj.valAccu, c="orange")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Val'])
plt.title('Train/Val Accuracy Vs Epoch For EfficientNetB2')