In [7]:
!rm -r machine_hack/kmodel_dir/

## Data preparation

In [1]:
import subprocess
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import cv2
import numpy as np
import os
import pandas as pd
from sklearn import model_selection, metrics, utils
import shutil
import random

In [2]:
train_df = pd.read_csv('dataset/train.csv')
# plt.figure(figsize=(1, 6))
# sns.countplot(train_df['breed'])
# # ax = sns.barplot(x="breed", y="Frequency", data=train_df['breed'])

In [3]:
new_classes = [train_df['breed'].value_counts().index[i] for i in range(0, len(train_df['breed'].value_counts())) 
               if train_df['breed'].value_counts().values[i] < 200]

In [4]:
final_df = pd.DataFrame()
for x in new_classes:
    datap = train_df[train_df['breed'] == x]
    final_df = final_df.append(datap)

final_df.index = list(range(0, len(final_df)))

In [5]:
train, valid = model_selection.train_test_split(final_df, test_size=.1, stratify=final_df['breed'])

In [6]:
!rm -r data/train
!rm -r data/valid
!mkdir data/train
!mkdir data/valid

In [7]:
for x in final_df['breed'].value_counts().index:
    subprocess.call(['mkdir', 'data/train/' + x])
    subprocess.call(['mkdir', 'data/valid/' + x])

In [8]:
for i in range(0, len(train)):
    name = train['image_id'].iloc[i]
    label = train['breed'].iloc[i]
    shutil.copy('dataset/train/'+name+'.jpg', 'data/train/'+label+'/'+name+'.jpg')

In [9]:
for i in range(0, len(valid)):
    name = valid['image_id'].iloc[i]
    label = valid['breed'].iloc[i]
    shutil.copy('dataset/train/'+name+'.jpg', 'data/valid/'+label+'/'+name+'.jpg')

# Image classification for classification fouls or not

The images are retrieved using bing image downloader api and the model is created using tensorflow and tensorflow hub

## Modelling

In [11]:
callbacks = [
    tf.keras.callbacks.ModelCheckpoint(filepath='model_dir/modelfinal.h5', save_best_only=True,
                                      monitor='val_accuracy'),
    tf.keras.callbacks.TensorBoard(log_dir='model_dir/logs'),
]

In [12]:
module_selection = ("mobilenet_v2_035_192", 192)
handle_base, pixels = module_selection
MODULE_HANDLE ="https://tfhub.dev/google/imagenet/{}/feature_vector/4".format(handle_base)
IMAGE_SIZE = (pixels, pixels)
print("Using {} with input size {}".format(MODULE_HANDLE, IMAGE_SIZE))
BATCH_SIZE = 32 

Using https://tfhub.dev/google/imagenet/mobilenet_v2_035_192/feature_vector/4 with input size (192, 192)


In [13]:
datagen_kwargs = dict(rescale=1./255)
dataflow_kwargs = dict(target_size=IMAGE_SIZE, batch_size=BATCH_SIZE, interpolation="bilinear")

valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(**datagen_kwargs)
valid_generator = valid_datagen.flow_from_directory('data/valid/', 
                                                    shuffle=True,  
                                                    **dataflow_kwargs)

do_data_augmentation = False 
if do_data_augmentation:
      train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
          rotation_range=40,
          horizontal_flip=True,
          width_shift_range=0.2, height_shift_range=0.2,
          shear_range=0.2, zoom_range=0.2,
          **datagen_kwargs)
else:
      train_datagen = valid_datagen

train_generator = train_datagen.flow_from_directory('data/train/', 
                                                    shuffle=True, 
                                                    **dataflow_kwargs)

Found 311 images belonging to 28 classes.
Found 2793 images belonging to 28 classes.


### Transfer learning

In [23]:
do_fine_tuning = False

print("Building model with", MODULE_HANDLE)
model = tf.keras.Sequential([
    # Explicitly define the input shape so the model can be properly
    # loaded by the TFLiteConverter
    tf.keras.layers.InputLayer(input_shape=IMAGE_SIZE + (3,)), hub.KerasLayer(MODULE_HANDLE, trainable=do_fine_tuning),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(rate=0.1),
    tf.keras.layers.Dense(train_generator.num_classes,
                          kernel_regularizer=tf.keras.regularizers.l2(0.00001))
])
model.build((None,)+IMAGE_SIZE+(3,))
model.summary()

Building model with https://tfhub.dev/google/imagenet/mobilenet_v2_035_192/feature_vector/4
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer_3 (KerasLayer)   (None, 1280)              410208    
_________________________________________________________________
batch_normalization_2 (Batch (None, 1280)              5120      
_________________________________________________________________
dropout_3 (Dropout)          (None, 1280)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 28)                35868     
Total params: 451,196
Trainable params: 38,428
Non-trainable params: 412,768
_________________________________________________________________


In [24]:
class_weights = utils.class_weight.compute_class_weight(
           'balanced',
            np.unique(train_generator.classes), 
            train_generator.classes)
class_weights = dict(enumerate(class_weights))
# class_weights

In [25]:
model.compile(
    optimizer=tf.keras.optimizers.SGD(lr=0.005, momentum=0.9),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.01),
    metrics=['accuracy'])

In [None]:
steps_per_epoch = train_generator.samples // train_generator.batch_size
validation_steps = valid_generator.samples // valid_generator.batch_size
hist = model.fit(
    train_generator,
    epochs=100, steps_per_epoch=steps_per_epoch,
    validation_data=valid_generator,
    class_weight=class_weights,
    shuffle=True,
    validation_steps=validation_steps).history

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100

In [None]:
plt.figure()
plt.ylabel("Loss (training and validation)")
plt.xlabel("Training Steps")
plt.ylim([0,5])
plt.plot(hist["loss"])
plt.plot(hist["val_loss"])

plt.figure()
plt.ylabel("Accuracy (training and validation)")
plt.xlabel("Training Steps")
plt.ylim([0,1])
plt.plot(hist["accuracy"])
plt.plot(hist["val_accuracy"])

### Custom model

In [None]:
c_model = tf.keras.models.Sequential()
c_model.add(tf.keras.layers.Conv2D(32, (3, 3), input_shape=(IMAGE_SHAPE[0], IMAGE_SHAPE[1], 3)))
c_model.add(tf.keras.layers.Activation('relu'))
c_model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

c_model.add(tf.keras.layers.Conv2D(64, (3, 3)))
c_model.add(tf.keras.layers.Activation('relu'))
c_model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

c_model.add(tf.keras.layers.Conv2D(128, (3, 3)))
c_model.add(tf.keras.layers.Activation('relu'))
c_model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

c_model.add(tf.keras.layers.Flatten())  # this converts our 3D feature maps to 1D feature vectors
c_model.add(tf.keras.layers.Dense(64))
c_model.add(tf.keras.layers.Activation('relu'))
c_model.add(tf.keras.layers.Dropout(0.5))
c_model.add(tf.keras.layers.Dense(train_generator.num_classes))
c_model.add(tf.keras.layers.Activation('sigmoid'))

c_model.compile(
    optimizer=tf.keras.optimizers.SGD(lr=0.005, momentum=0.9),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.01),
    metrics=['accuracy'])

In [None]:
steps_per_epoch = train_generator.samples // train_generator.batch_size
validation_steps = valid_generator.samples // valid_generator.batch_size
c_hist = c_model.fit(
    train_generator,
    epochs=50, steps_per_epoch=steps_per_epoch,
    validation_data=valid_generator,
    class_weight=class_weights,
    validation_steps=validation_steps).history

### Keras application

In [None]:
batch_size = 32
# training for 10 epochs
epochs = 30
# size of each image
IMAGE_SHAPE = (224, 224, 3)


valid_data_gen = ImageDataGenerator(rescale=1/255)
train_data_gen = ImageDataGenerator(
                                    rescale=1./255,
                                    rotation_range=30,
                                    width_shift_range=0.2,
                                    height_shift_range=0.4,
                                    shear_range=0.2,
                                    zoom_range=0.2,
                                    channel_shift_range=0.4
                                   )
# make the training dataset generator
train_image_generator = train_data_gen.flow_from_directory(directory='data/train/', batch_size=batch_size,
                                                     target_size=(IMAGE_SHAPE[0], IMAGE_SHAPE[1]),
                                                        shuffle=True)
# make the validation dataset generator
valid_image_generator = valid_data_gen.flow_from_directory(directory='data/valid/', batch_size=batch_size, 
                                                     target_size=(IMAGE_SHAPE[0], IMAGE_SHAPE[1]),
                                                     shuffle=True)


In [None]:
k_class_weights = utils.class_weight.compute_class_weight(
           'balanced',
            np.unique(train_image_generator.classes), train_image_generator.classes)
k_class_weights = dict(enumerate(k_class_weights))
# class_weights

In [None]:
input_shape = (224, 224, 3)
conv_base = tf.keras.applications.MobileNetV2(input_shape=input_shape, weights='imagenet')

k_model = tf.keras.models.Sequential()
k_model.add(conv_base)
k_model.add(tf.keras.layers.BatchNormalization())
k_model.add(tf.keras.layers.Dropout(0.2))
k_model.add(tf.keras.layers.Dense(512, activation='relu'))
k_model.add(tf.keras.layers.Dropout(0.5))
k_model.add(tf.keras.layers.Dense(train_image_generator.num_classes))

for layer in k_model.layers[:-4]:
    layer.trainable = False
    
# print the summary of the model architecture
# k_model.summary()
k_model.compile(
                optimizer=tf.keras.optimizers.SGD(lr=0.005, momentum=0.9),
                loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),
                metrics=[tf.keras.metrics.Accuracy(), tf.keras.metrics.TopKCategoricalAccuracy()])

In [None]:
k_callbacks = [
    tf.keras.callbacks.ModelCheckpoint(filepath='kmodel_dir/modelfinal.h5', save_best_only=True,
                                      monitor='val_accuracy'),
    tf.keras.callbacks.TensorBoard(log_dir='kmodel_dir/logs'),
]

In [None]:
training_steps_per_epoch = np.ceil(train_image_generator.samples / batch_size)
validation_steps_per_epoch = np.ceil(valid_image_generator.samples / batch_size)
# train using the generators
k_hist = k_model.fit(train_image_generator, 
              steps_per_epoch=training_steps_per_epoch,
              validation_data=valid_image_generator, 
              validation_steps=validation_steps_per_epoch,
              epochs=epochs,
              class_weight=k_class_weights,
              verbose=1, 
              callbacks=k_callbacks)

### Inferencing

In [None]:
class Predictions:
    
    def __init__(self, image_dir, model, test_df, x, y):
        self.image_dir = image_dir
        self.model = model
        self.test_df = test_df
        self.x = x
        self.y = y
    
    def predict(self, image_name):
        self.image_name = image_name
        test_img = cv2.imread(os.path.join(self.image_dir, self.image_name))
        test_img = np.resize(test_img, (1, self.x, self.y, 3))
        tf_model_predictions = self.model.predict(test_img)
        id_ = np.argmax(tf_model_predictions[0])
        
        return tf_model_predictions

In [None]:
#train_generator.class_indices
test_df = pd.read_csv('dataset/test.csv')
image_ids = list(test_df['image_id'])

test_results = []
for x in image_ids:
    set_f = {}
    x_name = x + '.jpg'
    prediction_module = Predictions('dataset/test/', model, 'test', 160, 160)
    predits = Predictions.predict(prediction_module, x_name)
    pred_label = np.argmax(predits[0])
    set_f['image_id'] = x
    set_f['breed'] = pred_label
    test_results.append(set_f)

In [None]:
test_results_df = pd.DataFrame(test_results)
test_results_df['breed'].value_counts()

In [None]:
new_id = {}
for id,val in enumerate(train_generator.class_indices): 
    new_id[id] = val 
    
test_results_df['breed'] = [new_id[x] for x in test_results_df['breed']]

In [None]:
# test_results_df.drop(['id-preds'], axis=1, inplace=True)
test_results_df.to_csv('results/vishnu_submit.csv', index=None)

### Using OOPs

In [None]:
class Preprocessing:
    def __init__(self, x, y, batch_size, train_path, valid_path):
        self.x = x
        self.y = y
        self.batch_size = batch_size
        self.train_path = train_path
        self.valid_path = valid_path
    
    def generator(self):
        train_datagen = ImageDataGenerator(
                                            rescale=1./255,
                                            rotation_range=30,
                                            width_shift_range=0.2,
                                            height_shift_range=0.4,
                                            shear_range=0.2,
                                            zoom_range=0.2,
                                            channel_shift_range=0.4,
                                            fill_mode="nearest",
                                            cval=0.4,
                                            horizontal_flip=True,
                                            vertical_flip=True
                                           )
        test_datagen = ImageDataGenerator(rescale=1./255)
        
        train_generator = train_datagen.flow_from_directory(
                                                            self.train_path,  # this is the target directory
                                                            target_size=(self.x, self.y),  # all images will be resized to 150x150
                                                            batch_size=self.batch_size,
                                                            class_mode='categorical')

        valid_generator = test_datagen.flow_from_directory(
                                                            self.valid_path,
                                                            target_size=(self.x, self.y),
                                                            batch_size=self.batch_size,
                                                            class_mode='categorical')
        
        return train_generator, valid_generator
    
class Model:
    def __init__(self, train_generator, valid_generator, met, los, model_link, x, y, class_weight, callbacks):
        self.train_generator = train_generator
        self.valid_generator = valid_generator
        self.met = met
        self.los = los
        self.model_link = model_link
        self.x = x
        self.y = y
        self.class_weight = class_weight
        self.callbacks = callbacks
    
    def compiler(self, dropout):
        self.dropout = dropout
        tl_model = tf.keras.Sequential([
                    hub.KerasLayer(self.model_link, trainable=True),
                    tf.keras.layers.Dropout(self.dropout),
                    tf.keras.layers.Dense(self.train_generator.num_classes,
                                          kernel_regularizer=tf.keras.regularizers.l2(0.0001))
        ])
        tl_model.build([None, self.x, self.y, 3])
        optimizer = tf.keras.optimizers.Adam(lr=1e-3)
        tl_model.compile(optimizer=optimizer, loss=self.los, metrics=self.met)
        
        return tl_model
    
    def train(self, epochs, model):
        self.epochs = epochs
        self.model = model
        steps_per_epoch = np.ceil(self.train_generator.samples/self.train_generator.batch_size)
        val_steps_per_epoch = np.ceil(self.valid_generator.samples/self.valid_generator.batch_size)
        hist = self.model.fit(
                            self.train_generator, 
                            epochs=self.epochs,
                            verbose=1,
                            steps_per_epoch=steps_per_epoch,
                            class_weight=self.class_weight,
                            callbacks=self.callbacks,
                            validation_data=self.valid_generator,
                            validation_steps=val_steps_per_epoch).history

        return self.model, hist
    
met = tf.keras.metrics.Accuracy()
los = tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1)
model_weights = 'https://tfhub.dev/google/imagenet/mobilenet_v2_035_224/classification/4'

In [None]:
preprocess = Preprocessing(224, 224, 8, 'data/train', 'data/valid')
train_generator, valid_generator = Preprocessing.generator(preprocess)

In [None]:
image_class_model = Model(train_generator, 
                          valid_generator, 
                          met, los, 
                          model_weights, 
                          224, 224, 
                          class_weights,
                          callbacks)
tl_model = Model.compiler(image_class_model, 0.4)
img_model, img_hist = Model.train(image_class_model, 40, tl_model)

## Inferencing

In [None]:
class Predictions:
    
    def __init__(self, image_dir, model, test_df, x, y):
        self.image_dir = image_dir
        self.model = model
        self.test_df = test_df
        self.x = x
        self.y = y
    
    def predict(self, image_name):
        self.image_name = image_name
        test_img = cv2.imread(os.path.join(self.image_dir, self.image_name))
        test_img = np.resize(test_img, (1, self.x, self.y, 3))
        tf_model_predictions = self.model.predict(test_img)
        id_ = np.argmax(tf_model_predictions[0])
        
        return tf_model_predictions

### Visualizing images

In [None]:
import matplotlib.pyplot as plt
import cv2

In [None]:
test_img = cv2.imread('dataset/test/2006370aad.jpg', cv2.COLOR_BGR2RGB)
plt.imshow(test_img)

In [None]:
#train_generator.class_indices
test_df = pd.read_csv('dataset/test.csv')
image_ids = list(test_df['image_id'])

In [None]:
test_results = []
for x in image_ids:
    set_f = {}
    x_name = x + '.jpg'
    prediction_module = Predictions('dataset/test/', img_model, 'test', 224, 224)
    predits = Predictions.predict(prediction_module, x_name)
    pred_label = np.argmax(predits[0])
    set_f['id-preds'] = predits
    set_f['image_id'] = x
    set_f['breed'] = pred_label
    test_results.append(set_f)

In [None]:
test_results_df = pd.DataFrame(test_results)
test_results_df['breed'].value_counts()

In [None]:
test_results_df['id-preds'][6]

In [None]:
test_results_df['breed'] = [new_id[x] for x in test_results_df['breed']]

In [None]:
test_results_df.to_csv('vishnu_submit.csv', index=None)

In [None]:
!zip -r results.zip results

In [None]:
rm -r results/.ipynb_checkpoints/

In [None]:
new_id = {}
for id,val in enumerate(train_generator.class_indices): 
    new_id[id] = val 