# NDSC-2019
## Image Classification Using Transfer Learning

<b>Prerequisite</b> 
You have run the Mobile_traindata.ipynb notebook and has training and test data ready in the same directory. This includes images and csv files.

In [None]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import ticker
import seaborn as sns
%matplotlib inline

from keras.preprocessing import image

In [None]:
import os
currentdir = os.getcwd()

In [None]:
train = pd.read_csv(os.path.join(currentdir, r'train_data.csv'), index_col=0)
train1 = train[:10000]
train1.head()

In [None]:
# declare image dimensions. For VGG16 and Resnet50 use 224x224, for InceptionResNetV2 use 299x299
#we are using coloured images, so channel=3 for R,B,G. 
nrows = 224
ncolumns = 224
channels = 3  #change to 1 for grayscale image

In [None]:
#convert each image to numpy array after resizing to 100,100 size (for train data)
def func(path):
    img = image.load_img(path, target_size=(img_rows, img_cols))
    img_array = image.img_to_array(img)
    img_array /= 255 #standardizing array values between 0.0 to 1.0
    return img_array

In [None]:
train1['img_array'] = train1['image_path'].apply(lambda x: func(x))

print(train1.shape)
train1.head()

In [None]:
# our image is now represented by a NumPy array of shape (224, 224, 3),assuming TensorFlow "channels last" ordering of course
# but we need to expand the dimensions to be (1, 3, 224, 224) so we can pass it through the network
# we'll also preprocess the image by subtracting the mean RGB pixel intensity from the ImageNet dataset

#image = np.expand_dims(image, axis=0)
#image = preprocess_input(image)  #uncomment and run again to see the difference, if it improves accuracy

If at this stage we inspect the shape of our image, you’ll notice the shape of the NumPy array is (3, 224, 224) — each image is 224 pixels wide, 224 pixels tall, and has 3 channels (one for each of the Red, Green, and Blue channels, respectively).

However, before we can pass our image through our CNN for classification, we need to expand the dimensions to be (1, 3, 224, 224). This is done because when classifying images using Deep Learning and Convolutional Neural Networks, we often send images through the network in “batches” for efficiency. Thus, it’s actually quite rare to pass only one image at a time through the network.

We then preprocess the image by subtracting the mean RGB pixel intensity computed from the ImageNet dataset. Done in last line of above cell.

In [None]:
#load train data into arrays in the shape(num_rows, img_height, img_width, num_channels)
x=[]
y=[]
for i,rows in train1.iterrows():
    x.append(rows[2])
    y.append(rows[1])

x_train= np.array(x)
print(x_train.shape)
y_train = np.array(y)
y_train = keras.utils.np_utils.to_categorical(y_train, num_classes)
print(y_train.shape)

In [None]:
#validation data
(x_train, x_valid) = x_train[500:], x_train[:500] 
(y_train, y_valid) = y_train[500:], y_train[:500]

print(x_train.shape, y_train.shape)
print(x_valid.shape, y_valid.shape)

# Train and test the model

In [None]:
from keras.applications.vgg16 import VGG16

conv_base = VGG16(weights='imagenet', include_top=False)

In [None]:
# First check with VGG16, if accuracy is below 95% even after tuning parameters then use this model
# This works well on large dataset but overfits on small ones
# Uncomment the below two lines to run it

#from keras.applications.resnet50 import ResNet50

#conv_base = ResNet50(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)

In [None]:
# If the data is extremely huge and want to improve accuracy more try this model
# This model is very heavy on computation power and will take hours and hours to train the model
# Uncomment the below two lines to run it

#from keras.applications import InceptionResNetV2

#conv_base = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(150,150,3))

In [None]:
conv_base.summary()

In [None]:
from keras import layers
from keras import models

model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
predictions = Dense(35, activation='softmax')(x) #35 is the number of categories/classes that we have to predict

In [None]:
model.summary()

In [None]:
#We can try RMSprop optimizer with a learning rate of 0.0001 also
#We'll use binary_crossentropy loss because its a binary classification
from keras import optimizers

model.compile(loss='categorical_crossentropy', optimizer=optimizers.RMSprop(lr=2e-5), metrics=['acc'])

#model.compile(loss='categorical_crossentropy', optimizer=optimizers.SGD(lr=0.0001, momentum=0.9), metrics=['acc'])

#model.compile(loss='categorical_crossentropy', optimizer=optimizers.adam(lr=0.0001), metrics=['acc'])

# We can check the model with SGD(lr=0.0001, momentum=0.9) and adam(lr=0.0001) to check accuracy. Uncomment above lines for this

In [None]:
model.fit(x_train, y_train , batch_size = 64, epochs = 10 , 
          validation_data=(x_valid, y_valid), verbose = 1)

In [None]:
#Save the model (optional)
model.save_weights('model_wieghts.h5')
model.save('model_keras.h5')

In [None]:
#get the details form the history object
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

#Train and validation accuracy
plt.plot(epochs, acc, 'b', label='Training accurarcy')
plt.plot(epochs, val_acc, 'r', label='Validation accurarcy')
plt.title('Training and Validation accurarcy')
plt.legend()

plt.figure()
#Train and validation loss
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation loss')
plt.legend()

plt.show()

In [None]:
#Let's create a function that makes our plot looks smoother and cleaner.
def smooth_plot(points, factor=0.7):
    smooth_pts = []
    for point in points:
        if smooth_pts:
            previous = smooth_pts[-1]
            smooth_pts.append(previous * factor + point * (1 - factor))
        else:
            smooth_pts.append(point)
    return smooth_pts

In [None]:
#Plot figure
plt.plot(epochs, smooth_plot(acc), 'b', label='Training accurarcy')
plt.plot(epochs, smooth_plot(val_acc), 'r', label='Validation accurarcy')
plt.title('Training and Validation accurarcy')
plt.legend()
plt.show()

# Predict Test Data

In [None]:
# Read test data
test = pd.read_csv(os.path.join(currentdir, r'test_data.csv'), index_col = 0)
test.shape

In [None]:
# Convert images to np array

test['img_array'] = test['image_path'].apply(lambda x: func(x))

print(test.shape)
test.head()

In [None]:
#preparing test data
x1=[]

for i,rows in test.iterrows():
    x1.append(rows[4])

x_test= np.array(x1)
print(x_test.shape)

In [None]:
predictions = model.predict(x_test)
# decode the results into a list of tuples (class, description, probability)
# (one such list for each sample in the batch)
P = decode_predictions(preds, top=3)  # top is optional, if want want to see top 3 prediction probabilities, else omit

In [None]:
# Calculate Accuracy

scores = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])