# Feature Extraction using Google's Inception V3 CNN
## Dataset: Best Artworks of All Time
(https://www.kaggle.com/ikarus777/best-artworks-of-all-time?select=images)

In [123]:
import csv
import numpy as np
import matplotlib.pyplot as plt

import cv2
from pathlib import Path
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.applications.inception_v3 import InceptionV3


#### Import and Preprocessed Data 
Define location, image dimensions and conduct train, test, and validation data partitions  

In [124]:
# Image folders
images_folder = 'C:/Users/Daniel P/Documents/,,Spring21/CSCE 489/Feature Extraction/resized/resized'

import splitfolders
splitfolders.ratio('resized/resized', output="output", seed=1337, ratio=(.8, 0.1,0.1)) 

# Image dimensions
width = height = 380
channels = 3

# Image pre-processing
batch_size = 128
from_rgb_rescale = 1.0/255

Copying files: 8683 files [00:46, 187.75 files/s]


#### Define train and validation directories *Modify based on location

In [125]:
train_folder = 'C:/Users/Daniel P/Documents/,,Spring21/CSCE 489/Feature Extraction/output/train/'
validation_folder = 'C:/Users/Daniel P/Documents/,,Spring21/CSCE 489/Feature Extraction/output/val/'

#### Tool to check if directory is valid 

In [126]:
import os.path
isdir = os.path.isdir(train_folder)
print(isdir)

True


#### Check list of all images in dataset
Useful to determine realtive location of images based on list position 

In [135]:
import os
path ='C:/Users/Daniel P/Documents/,,Spring21/CSCE 489/Feature Extraction/resized/resized'
list_of_files = open('labels.txt', 'a')

for root, dirs, files in os.walk(path):
    for file in files:
        list_of_files.write(file)
        list_of_files.write("\n")
# for name in list_of_files:
#     print(name)
list_of_files.close()

#### Generate training sets based on training image directories 

In [128]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

image_generator = ImageDataGenerator(rescale=from_rgb_rescale)

train_generator = image_generator.flow_from_directory(
    directory=train_folder, 
    target_size=(width, height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False
)

Found 6924 images belonging to 51 classes.


#### Load and display pretrained Inception V3 CNN model with provided dimensions

In [68]:
inception_v3_model = InceptionV3(
    include_top=False, 
    weights='imagenet',
    input_shape=(width, height, channels)
)

Copying files: 0 files [13:31, ? files/s]


Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [129]:
inception_v3_model.summary()

Model: "inception_v3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 380, 380, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 189, 189, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 189, 189, 32) 96          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 189, 189, 32) 0           batch_normalization[0][0]        
_______________________________________________________________________________________

In [130]:
inception_v3_model.compile(
    optimizer='adagrad',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

#### Predict features by feeding data to the pretrained network 

In [131]:
train_features = inception_v3_model.predict_generator(train_generator)

In [132]:
print('Train features shape {}'.format(train_features.shape))
print('Max {}'.format(train_features.max()))
print('Min {}'.format(train_features.min()))
print('Sample: {}'.format(train_features[0]))

Train features shape (6924, 10, 10, 2048)
Max 33.07126235961914
Min 0.0
Sample: [[[0.31765118 0.5884839  0.7345319  ... 1.1978512  1.2209369  3.1717079 ]
  [0.43529275 0.         0.         ... 1.2163258  1.2748712  2.404144  ]
  [0.         0.         0.         ... 0.7862179  1.2088115  0.9690614 ]
  ...
  [1.1201547  0.         0.         ... 0.22075297 0.         0.6866915 ]
  [0.62887084 0.6962633  0.         ... 0.8009587  0.         0.35312036]
  [0.         0.09878466 0.         ... 1.083804   0.         0.06248389]]

 [[0.         0.         1.5791489  ... 0.84773386 0.94211805 2.13814   ]
  [0.         0.         0.44718203 ... 0.91453725 0.9618644  1.6135615 ]
  [0.         0.         0.7848551  ... 0.6489707  0.88809997 0.67056364]
  ...
  [1.0930996  1.0236769  0.29629618 ... 0.09613347 0.         0.80714315]
  [0.22212264 0.49580273 0.01460232 ... 0.651039   0.         0.6459734 ]
  [0.03976159 0.701686   0.         ... 0.9408099  0.         0.3929589 ]]

 [[0.         0.

#### Place output model features in CSV file

In [134]:
features_list = [i for i in range(train_features.shape[-1])]

with open('train_art_features.csv', 'w') as f:
    writer = csv.writer(f)

    writer.writerow(features_list)
    for features in train_features:
        writer.writerow(features[0][0])