In [None]:
# For downloading dataset
from urllib.request import urlretrieve
import os

# For extracting dataset
import tarfile

# For reading images
import cv2

# Essentials :)
import numpy as np

# pretty printing python objects
import pprint

# for sorting dictionary by value
import operator

# for showing images inline
from matplotlib.pyplot import imshow 
%matplotlib inline 

# for making labels one-hot encoded
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

# for splitting data into training and validation data
from sklearn.model_selection import train_test_split

# for CNN and NN models
from keras.models import Sequential, Model
from keras.layers import Conv2D, Input, Dropout, Activation, Dense, MaxPooling2D, Flatten, GlobalAveragePooling2D
from keras.optimizers import Adadelta
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
from keras.models import load_model

# For transfer learning
from keras.applications.inception_v3 import InceptionV3

# to save models
import json

# for saving environment of notebook
import dill

# for printing size each variable is using
import sys

In [None]:
link1 = "/content/drive/My Drive/2D_images_dataset_FE_no_scale_P1"
link2 = "/content/drive/My Drive/2D_images_dataset_FE_no_scale_P2"
link3 = "/content/drive/My Drive/2D_images_dataset_FE_no_scale_P3"
links=[link1,link2,link3]

In [None]:
def return_images_per_category(data_directory):
    categories = os.listdir(data_directory)
    object_images_count_dict = {}
    for category in categories:
        object_images_count_dict[category] = len(os.listdir(data_directory+'/'+category))
    object_images_count_dict = sorted(object_images_count_dict.items(), key=operator.itemgetter(1), reverse=True)
    return object_images_count_dict

In [None]:
!pip install patool



In [None]:
import patoolib
for link in links:
  patoolib.extract_archive(link+".rar")

patool: Extracting /content/drive/My Drive/2D_images_dataset_FE_no_scale_P1.rar ...
patool: running /usr/bin/unrar x -- "/content/drive/My Drive/2D_images_dataset_FE_no_scale_P1.rar"
patool:     with cwd='./Unpack_ybj2l9jh'
patool: ... /content/drive/My Drive/2D_images_dataset_FE_no_scale_P1.rar extracted to `2D_images_dataset_FE_no_scale_P11' (multiple files in root).
patool: Extracting /content/drive/My Drive/2D_images_dataset_FE_no_scale_P2.rar ...
patool: running /usr/bin/unrar x -- "/content/drive/My Drive/2D_images_dataset_FE_no_scale_P2.rar"
patool:     with cwd='./Unpack_z6hohgo8'
patool: ... /content/drive/My Drive/2D_images_dataset_FE_no_scale_P2.rar extracted to `2D_images_dataset_FE_no_scale_P21' (multiple files in root).
patool: Extracting /content/drive/My Drive/2D_images_dataset_FE_no_scale_P3.rar ...
patool: running /usr/bin/unrar x -- "/content/drive/My Drive/2D_images_dataset_FE_no_scale_P3.rar"
patool:     with cwd='./Unpack_g11e23yx'
patool: ... /content/drive/My Dr

In [None]:
return_images_per_category(link)

In [None]:
link1= "/content/2D_images_dataset_FE_no_scale_P1"
link2= "/content/2D_images_dataset_FE_no_scale_P2"
link3= "/content/2D_images_dataset_FE_no_scale_P3"
links=[link1,link2,link3]

In [None]:
total_count = 0
for link in links:
  for category,count in return_images_per_category(link):
      if category == 'BACKGROUND_Google':
          continue;
      total_count += count
print("Total number of images in training data : ",total_count)

Total number of images in training data :  8678


In [None]:
def get_images(object_category, data_directory):
    if (not os.path.exists(data_directory)):
        print("Data directory not found. Are you sure you downloaded and extracted dataset properly?")
        return
    obj_category_dir = os.path.join(data_directory, object_category)
    images = [os.path.join(obj_category_dir,img) for img in os.listdir(obj_category_dir)]
    return images

def read_image(image_path):
    """Read and resize individual images - Caltech 101 avg size of image is 300x200, so we resize accordingly"""
    img = cv2.imread(image_path, cv2.IMREAD_COLOR)
    img = cv2.resize(img, (300,200), interpolation=cv2.INTER_CUBIC)
    return img

In [None]:

def create_training_data(data_directory):
    i = 0
    X = np.ndarray((8677, 200, 300, 3), dtype=np.uint8)
    Y = []
    print("Preparing X and Y for dataset...")
    for link in links:
      for category,_ in return_images_per_category(link):
          if category == 'BACKGROUND_Google':
              continue
          for image in get_images(category, link):
              if not image.endswith('.jpg'):
                  # to escape hidden ipynb checkpoints and other unnecessary files 
                  continue
              X[i] = read_image(image)
              Y.insert(i,category) 
              i += 1
    print("Datasets constructed")
    return X,Y

In [None]:
X, Y = create_training_data(link)
print(X.shape)
print(len(Y))

Preparing X and Y for dataset...
Datasets constructed
(8677, 200, 300, 3)
8677


In [None]:
label_encoder = LabelEncoder()
Y_integer_encoded = label_encoder.fit_transform(Y)

In [None]:
Y_one_hot = to_categorical(Y_integer_encoded)
print(Y_one_hot.shape)


(8677, 101)


In [None]:
X_normalized = X.astype(np.float32) / 255
print(X_normalized.shape)

(8677, 200, 300, 3)


In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X_normalized, Y_one_hot, test_size=0.25, random_state=42)
index = int((len(X_train)*21) / 100)
X_validation =  X_train[-index:]
Y_validation =  Y_train[-index:]
X_train = X_train[:-index]
Y_train= Y_train[:-index]
print(len(X_train))
print(len(X_test))
len(X_validation)

5141
2170


1366

In [None]:
from keras.applications.vgg16 import VGG16

base_model = VGG16(weights='imagenet', include_top=False)

transfer_learning_arch = base_model.output
transfer_learning_arch = GlobalAveragePooling2D()(transfer_learning_arch)
transfer_learning_arch = Dense(1024, activation='relu')(transfer_learning_arch)
transfer_learning_arch = Dropout(0.4)(transfer_learning_arch)
transfer_learning_arch = Dense(512, activation='relu')(transfer_learning_arch)
transfer_learning_arch = Dropout(0.4)(transfer_learning_arch)
predictions = Dense(101, activation='softmax')(transfer_learning_arch)

transfer_learning_model = Model(inputs=base_model.input, outputs=predictions)
# incomment if Model is InceptionV3
# for layer in transfer_learning_model.layers[:280]:
#     layer.trainable = False
# for layer in transfer_learning_model.layers[280:]:
#     layer.trainable = True

## incomment if Model is VGG16
for layer in transfer_learning_model.layers[:19]:
    layer.trainable = False
for layer in transfer_learning_model.layers[19:]:
    layer.trainable = True

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:

opt=Adadelta(lr=1.0, rho=0.9, epsilon=1e-08, decay=0.0)
from keras.optimizers import SGD
opt = SGD(lr=0.01, momentum=0.9, decay=0.01)
transfer_learning_model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [None]:
transfer_learning_model.fit(X_train, Y_train, batch_size=10, epochs=10, verbose=1, validation_data=(X_validation,Y_validation))

Train on 5141 samples, validate on 1366 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7f538df20d30>

In [None]:
print('\n# Evaluate on test data')
results = transfer_learning_model.evaluate(X_test, Y_test, batch_size=5)
print('test loss, test acc:', results)


# Evaluate on test data
test loss, test acc: [1.7326678833963423, 0.5981566905975342]
