# Deep Convolutional Neural Network for Art Classification with PyTorch

## Imports

In [3]:
import os
import torch
import torchvision
import tarfile
from torchvision.datasets.utils import download_url
from torch.utils.data import random_split
import PIL
from PIL import Image
import cv2
import pathlib
import glob
from pathlib import Path
import numpy as np
import shutil
import random

In [4]:
project_name='CNN_classifier'

In [5]:
path_str = '/Users/alexandreberkovic/Desktop/Year_4/Masters'

In [6]:
path = Path(path_str)

In [7]:
os.listdir(path)

['Research',
 '.DS_Store',
 'classifier for art.pdf',
 'Dataset',
 'Master projects.xlsx',
 'Interim',
 'Repo']

In [8]:
# directory of image folders per mouvement
img_folders = Path(path_str+'/'+'Dataset/wikiart')

In [9]:
# remove DS_Store file
folders = list(os.listdir(img_folders))
folders.remove('.DS_Store')

## Create a subset of the dataset to play with during the CNN

In [96]:
def subset(path):
    for i in range(len(folders)):
        dirpath = os.path.join(path,folders[i])
        directory_length = int(0.1*len(list(os.listdir(dirpath))))
        filenames = random.sample(os.listdir(dirpath),directory_length)
        print('{} has {} images'.format(folders[i], directory_length))
#         print('Mouvement' + folders[i] "has" + str(directory_length) + 'images') 
        
        destDirectory = 'Dataset_subset/' + str(folders[i])
        if not os.path.exists(destDirectory):
            os.makedirs(destDirectory)
            
        else:
            for f in os.listdir(destDirectory):
                os.remove(os.path.join(destDirectory, f))
       
        for fname in filenames:
            srcpath = os.path.join(dirpath, fname)
            shutil.copy(srcpath, destDirectory)

In [95]:
subset(img_folders)

Mouvement Early_Renaissance has 139 images
Mouvement Analytical_Cubism has 11 images
Mouvement Mannerism_Late_Renaissance has 127 images
Mouvement Expressionism has 673 images
Mouvement Contemporary_Realism has 48 images
Mouvement Fauvism has 93 images
Mouvement Trial has 0 images
Mouvement Northern_Renaissance has 255 images
Mouvement Rococo has 208 images
Mouvement Ukiyo_e has 116 images
Mouvement Pop_Art has 148 images
Mouvement High_Renaissance has 134 images
Mouvement Minimalism has 133 images
Mouvement Art_Nouveau_Modern has 433 images
Mouvement Action_painting has 9 images
Mouvement Color_Field_Painting has 161 images
Mouvement Symbolism has 452 images
Mouvement Realism has 1073 images
Mouvement Romanticism has 701 images
Mouvement Cubism has 223 images
Mouvement Impressionism has 1306 images
Mouvement New_Realism has 31 images
Mouvement Baroque has 424 images
Mouvement Post_Impressionism has 645 images
Mouvement Abstract_Expressionism has 278 images
Mouvement Pointillism has 51

## Data exploration

### Image resizing functions

In [103]:
# directory of image folders per mouvement
subset_folders = Path(path_str+'/'+'Repo/Classifier/Dataset_subset')

In [106]:
def resize_upper(path,cnn_size):
    '''
    Resizes the images so that one side is 256 and the other is larger
    Crops it so that the output is 256x256
    '''
    for i in range(len(folders)):
        dirpath = os.path.join(path,folders[i])
        
        images = [file for file in os.listdir(dirpath) if file.endswith(('jpeg', 'png', 'jpg'))]
        name = folders[i]

        if not os.path.exists(os.path.join('Resized',name)):
            os.makedirs(os.path.join('Resized',name))

        for image in images:
            img = Image.open(Path(str(subset_folders)+'/'+name+'/'+image))

            if img.size[0] >= img.size[1] and img.size[1] > cnn_size:

                fixed_height = cnn_size
                height_percent = (fixed_height / float(img.size[1]))
                width_size = int((float(img.size[0]) * float(height_percent)))
                img = img.resize((width_size, fixed_height), PIL.Image.NEAREST)
                cropped = crop(img)
                cropped.save("Resized/"+name+"/"+image, optimize=True, quality=100)

            elif img.size[0] < img.size[1] and img.size[0] > cnn_size:
                fixed_width = cnn_size
                width_percent = (fixed_width / float(img.size[0]))
                height_size = int((float(img.size[1]) * float(width_percent)))
                img = img.resize((fixed_width, height_size), PIL.Image.NEAREST)
                cropped = crop(img)
                cropped.save("Resized/"+name+"/"+image, optimize=True, quality=100)



In [100]:
def resize_upper(path,cnn_size):
    '''
    Resizes the images so that one side is 256 and the other is larger
    Crops it so that the output is 256x256
    '''
    images = [file for file in os.listdir(path) if file.endswith(('jpeg', 'png', 'jpg'))]
    name = str(path).split('/')[-1]
    
    if not os.path.exists(name):
        os.makedirs(name)
    
    for image in images:
        img = Image.open(Path(str(img_folders)+'/'+image))
        
        if img.size[0] >= img.size[1] and img.size[1] > cnn_size:

            fixed_height = cnn_size
            height_percent = (fixed_height / float(img.size[1]))
            width_size = int((float(img.size[0]) * float(height_percent)))
            img = img.resize((width_size, fixed_height), PIL.Image.NEAREST)
            cropped = crop(img)
            cropped.save("Resized/"+name+"/"+image, optimize=True, quality=100)
    
        elif img.size[0] < img.size[1] and img.size[0] > cnn_size:
            fixed_width = cnn_size
            width_percent = (fixed_width / float(img.size[0]))
            height_size = int((float(img.size[1]) * float(width_percent)))
            img = img.resize((fixed_width, height_size), PIL.Image.NEAREST)
            cropped = crop(img)
            cropped.save("Resized/"+name+"/"+image, optimize=True, quality=100)


In [101]:
def resize_lower(path,cnn_size):
    '''
    Resizes the images so that one side is 256 and the other is smaller
    Fills blank space with 0s so that the output is 256x256
    ''' 
    images = [file for file in os.listdir(path) if file.endswith(('jpeg', 'png', 'jpg'))]
    name = str(path).split('/')[-1]
    
    if not os.path.exists(name):
        os.makedirs(name)
    
    for image in images:
        img = Image.open(Path(str(img_folders)+'/'+image))
        
        if img.size[1] >= img.size[0] and img.size[0] > cnn_size:

            fixed_height = cnn_size
            height_percent = (fixed_height / float(img.size[1]))
            width_size = int((float(img.size[0]) * float(height_percent)))
            img = img.resize((width_size, fixed_height), PIL.Image.NEAREST)
            filled = fill(img,(0, 0, 0))
            filled.save("Resized/"+name+"/"+image, optimize=True, quality=100)
    
        elif img.size[1] < img.size[0] and img.size[1] > cnn_size:
            fixed_width = cnn_size
            width_percent = (fixed_width / float(img.size[0]))
            height_size = int((float(img.size[1]) * float(width_percent)))
            img = img.resize((fixed_width, height_size), PIL.Image.NEAREST)
            filled = fill(img,(0, 0, 0))
            filled.save("Resized/"+name+"/"+image, optimize=True, quality=100)



In [102]:
def resize_compress(path,cnn_size):
    '''
    Resizes the images by compressing them
    Output is 256x256
    ''' 
    images = [file for file in os.listdir(path) if file.endswith(('jpeg', 'png', 'jpg'))]
    name = str(path).split('/')[-1]
    
    if not os.path.exists(name):
        os.makedirs(name)
    for image in images:
        img = Image.open(Path(str(img_folders)+'/'+image))
        if img.size[0] > cnn_size and img.size[1] > cnn_size:
            resized_image = img.resize((256,256))
            resized_image.save("Resized/"+name+"/"+image, optimize=True, quality=100)
        else:
            pass

### Helper functions used after resizing to uniform image dimensions

In [14]:
def crop(im):
    '''
    Crops the image when one side is 256 and the other is bigger
    Outputs a 256x256 centred image
    '''
    # Opens a image in RGB mode
#     im = Image.open(r"C:\Users\Admin\Pictures\network.png")
 
    # Setting the points for cropped image
    width, height = im.size
    if width == 256:
        left = 0
        right = 256
        top = height//2 - 128
        bottom = height//2 + 128

    elif height == 256:
        left = width//2 - 128
        right = width//2 + 128
        top = 0
        bottom = 256
        
    # Cropped image of above dimension
    # (It will not change original image)
    im1 = im.crop((left, top, right, bottom))
    return im1

In [15]:
def fill(pil_img, background_color):
    '''
    Fills the image with 0s when one side is 256 and the other is smaller
    Outputs a 256x256 centred image
    '''
    width, height = pil_img.size
    if width == height:
        return pil_img
    elif width > height:
        result = Image.new(pil_img.mode, (width, width), background_color)
        result.paste(pil_img, (0, (width - height) // 2))
        return result
    else:
        result = Image.new(pil_img.mode, (height, height), background_color)
        result.paste(pil_img, ((height - width) // 2, 0))
        return result

### Creating the usable dataset

In [107]:
resize_upper(subset_folders,256)

FileNotFoundError: [Errno 2] No such file or directory: 'Analytical_Cubism/georges-braque_the-bottle-of-rum-1912.jpg'