In [1]:
import sys
sys.path.append("..") ## resetting the path to the parent directory

In [2]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.style.use('ggplot')

In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense
#keras = tf.keras
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [4]:
np.random.seed(101)

In [5]:
df = pd.read_csv('../data/cleaned_data.csv').drop('Unnamed: 0',axis=1)

In [6]:
#df.drop('Unnamed: 0',axis=1,inplace=True)

In [7]:
def filename_appender(df):
    df.fname = df.fname.apply(lambda x: '../img/' + x + '.jpg')
    return df

In [8]:
#df = filename_appender(df)

In [9]:
from sklearn.model_selection import train_test_split

X = df.drop('code',axis=1)

y = df.code

X_train, X_test, y_train,y_test = train_test_split(X,y, test_size=0.1)

X_train, X_val, y_train, y_val = train_test_split(X_train,y_train, test_size=0.1)

In [10]:
# The format specification here left pads zeros on the number: 0004.
train_filenames = [fname for fname in X_train.fname]
val_filenames = [fname for fname in X_val.fname]
test_filenames = [fname for fname in X_test.fname]

In [11]:
df.to_csv('../data/cleaned_data.csv')

## Preparing datasets
### Finding and deleting corrupted images that Tensorflow cannot handle

In [12]:
def reset_indices(arr):
    arr.reset_index(drop=True,inplace=True)
    return arr

In [13]:
def load_and_preprocess_image(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, [150, 150])
    image /= 255.0
    return image

def remove_corrupt_files(filenames,y_arr):
    imgs = []
    
    y_arr = reset_indices(y_arr)
    for i,filename in enumerate(filenames):        
        # Try reading, decoding, resizing and normalizing images
        # Only appends files that do not cause any errors to an 'imgs' list
        try:
            img = load_and_preprocess_image(filename)
            imgs.append(np.asarray(img))  

            # If error occurs, skip to the next file and remove the file from the y array using its index
        except:
            
            del y_arr[i]
            continue
              
    X = np.asarray(imgs)
    y = np.asarray(y_arr)
    
    return X, y

In [14]:
# X_train_f,y_train_f = remove_corrupt_files(train_filenames,y_train)
# X_val_f,y_val_f = remove_corrupt_files(val_filenames,y_val)
# X_test_f,y_test_f = remove_corrupt_files(test_filenames,y_test)

In [15]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

In [16]:
train_datagen = ImageDataGenerator(
        rotation_range=40,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')
test_datagen = ImageDataGenerator(rescale=1./255)
batch_size = 32

train_generator = train_datagen.flow_from_directory(
        '../imgs/train_folder',  # this is the target directory
        target_size=(224, 224),  # all images will be resized to 150x150
        batch_size=batch_size,
        color_mode="rgb",
        class_mode='categorical')  # since we use binary_crossentropy loss, we need binary labels

validation_generator = test_datagen.flow_from_directory(
        '../imgs/validation_folder',
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='categorical')


steps_per_epoch = 100
epoch  = 50
def my_gen(gen):
    i=0
    while i < steps_per_epoch * epoch:
        try:
            data, labels = next(gen)
            i+=1
            yield data, labels
        except:
            pass

Found 10254 images belonging to 12 classes.
Found 1140 images belonging to 12 classes.


## Training the tensorflow model

In [17]:
from tensorflow.keras import backend as K
img_width, img_height = 150, 150
if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)

In [18]:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(input_shape),activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(12))
model.add(Activation('softmax'))

optimizer = SGD(lr=0.005, momentum=0.9)

model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

In [19]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 72, 72, 64)        18496     
_________________________________________________________________
activation (Activation)      (None, 72, 72, 64)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
activation_1 (Activation)    (None, 34, 34, 128)       0

In [20]:
from tensorflow.keras import callbacks

filepath="../checkpoints/base_model_weights_improvement-{epoch:02d}-{val_accuracy:.2f}.hdf5"


my_callbacks = [
    callbacks.EarlyStopping(monitor='val_loss', min_delta = 0.001, mode= 'min', patience=5),
    callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=0,
    min_delta=0.001, cooldown=1, min_lr=0, mode = 'min'),
    callbacks.ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
]

In [21]:
# filepath="../checkpoints/base-improvement-{epoch:02d}-{val_accuracy:.2f}.hdf5"
# checkpoint = callbacks.ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
# callbacks_list = [checkpoint]

In [22]:
history = model.fit( my_gen(train_generator),
        steps_per_epoch=steps_per_epoch,
        epochs=epoch,
        validation_data=validation_generator,callbacks=my_callbacks)

Epoch 1/50


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-22-397ed871f397>", line 4, in <module>
    validation_data=validation_generator,callbacks=my_callbacks)
  File "/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 66, in _method_wrapper
    return method(self, *args, **kwargs)
  File "/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 848, in fit
    tmp_logs = train_function(iterator)
  File "/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 580, in __call__
    result = self._call(*args, **kwds)
  File "/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 644, in _call
    return self._stateless_fn(*args, **kwds)
  File "/opt/conda/lib/python3.7/site-packages/tensorflow/python/e

InvalidArgumentError:  Input to reshape is a tensor with 2768896 values, but the requested shape requires a multiple of 36992
	 [[node sequential/flatten/Reshape (defined at <ipython-input-22-397ed871f397>:4) ]] [Op:__inference_train_function_791]

Function call stack:
train_function


In [None]:
test_loss, test_acc = model.evaluate(X_test_f, y_test_f, verbose=0)

print('\nTest accuracy {:5.2f}%'.format(100*test_acc))

## Visualize the model

In [None]:
training_loss = history.history['loss']
test_loss = history.history['val_loss']

epoch_count = range(1,len(training_loss)+1)

plt.plot(epoch_count,training_loss,'r--')
plt.plot(epoch_count,test_loss,'b-')
plt.legend(['Training Loss', 'Validation Loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show();

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

epoch_count = range(1,len(training_loss)+1)

plt.plot(acc, 'r--', label='Training Accuracy')
plt.plot(val_acc, 'b-', label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy');
