# Business Problem

1. **Objective**
   - To classifiy an image into 3 categories: Driving License, Social Security Card, Other
   - Learn from images and its properties to classify into the respective categories better


2. **Machine Learning Problem**
   - Develop a machine learning model based on convolution neural network to learn properties of the 3 categories of images and predict accurately for new image


3. **Technology**
   - Python, Scikit-learn, tensorflow, keras, Numpy
   

4. **Decision making**
   - Select the best model which performs the best w.r.t classification accuracy
   - Metrics: Accuracy
   

5. **Deployment**
   - Deploy model in a scalable way so that business decisions can be taken in near real time to classify images



**Dataset**<br>
driving_license images<br>
social_security images<br>
other images<br>

In [None]:
!pip install numpy==1.21.5
!pip install keras==2.7.0
!pip install tensorflow==2.7.0
!pip install matplotlib==3.5.1

In [None]:
import numpy as np
import pathlib
from tensorflow import keras
from tensorflow.keras import layers
import PIL
import tensorflow as tf
from tensorflow.keras.models import Sequential

In [None]:
from zipfile import ZipFile
import urllib.request
from io import BytesIO
folder = urllib.request.urlopen('https://s3.amazonaws.com/projex.dezyre.com/cnn-models-for-image-classification-in-python/materials/data.zip')
zipfile = ZipFile(BytesIO(folder.read()))
zipfile.namelist()

In [None]:
zipfile.extractall('input')

In [None]:
data_dir = pathlib.Path("input/Training_data/")

In [None]:
image_count = len(list(data_dir.glob('*/*')))
print(image_count)

In [None]:
driving_license = list(data_dir.glob('driving_license/*'))
social_security = list(data_dir.glob('social_security/*'))
others = list(data_dir.glob('others/*'))

In [None]:
PIL.Image.open(str(driving_license[2]))

In [None]:
PIL.Image.open(str(social_security[10]))

In [None]:
PIL.Image.open(str(others[140]))

In [None]:
batch_size = 32
img_height = 180
img_width = 180

In [None]:
train_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

In [None]:
val_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

In [None]:
class_names = train_ds.class_names
print(class_names)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[labels[i]])
        plt.axis("off")

In [None]:
for image_batch, labels_batch in train_ds:
    print(image_batch.shape)
    print(labels_batch.shape)
    break

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
normalization_layer = layers.Rescaling(1./255)

In [None]:
normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0]
# Notice the pixel values are now in `[0,1]`.
print(np.min(first_image), np.max(first_image))

In [None]:
num_classes = len(class_names)

model = Sequential([
  layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes)
])

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
epochs=20
history = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=epochs
)

## Login to TrueFoundry  🎉

1. An account with  <a href="https://projectpro.truefoundry.com/signin">TrueFoundry</a>. has been created with the same email address that you use to sign in to ProjectPro and an email has been sent to you to set your password. 
2. Please go to your inbox and follow the link to make sure you are logged into TrueFoundry before getting to the next cell. If you don't see the email in your inbox, please check your Spam folder. 

Note: If you are not able to signin or did not receive an email, please send an email to nikunj@truefoundry.com with the following subject- "ProjectPro User: TrueFoundry Login Issue"

In [None]:
! pip install --upgrade mlfoundry

In [None]:
import mlfoundry as mlf

TRACKING_URL = 'https://projectpro.truefoundry.com'
mlf_api = mlf.get_client(TRACKING_URL)

mlf_run = mlf_api.create_run("cnn-project", "cnn")

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
for i in epochs_range:
    metrics_dict = {
        "acc": acc[i],
        "val_acc": val_acc[i],
        "loss": loss[i],
        "val_loss": val_loss[i]
    }
    mlf_run.log_metrics(metrics_dict, step=i)

params_dict = {
    "layer-1-chnnels": 16,
    "layer-2-chnnels": 32,
    "layer-3-chnnels": 64,
    "layer-4-chnnels": 128,
    
}
mlf_run.log_params(params_dict)

mlf_run.log_model(model, "keras")

In [None]:
data_augmentation = keras.Sequential(
  [
    layers.RandomFlip("horizontal",
                      input_shape=(img_height,
                                  img_width,
                                  3)),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
  ]
)

In [None]:
plt.figure(figsize=(10, 10))
for images, _ in train_ds.take(1):
    for i in range(9):
        augmented_images = data_augmentation(images)
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(augmented_images[0].numpy().astype("uint8"))
        plt.axis("off")

In [None]:
model = Sequential([
  data_augmentation,
  layers.Rescaling(1./255),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Dropout(0.2),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes)
])

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
epochs = 30
history = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=epochs
)

In [None]:
#model.save("cnn-model.h5")

In [None]:
# model = tf.keras.models.load_model("output/cnn-model.h5")

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
mlf_run = mlf_api.create_run("cnn-project", "cnn-with-augmentation")

In [None]:
for i in epochs_range:
    metrics_dict = {
        "acc": acc[i],
        "val_acc": val_acc[i],
        "loss": loss[i],
        "val_loss": val_loss[i]
    }
    mlf_run.log_metrics(metrics_dict, step=i)

params_dict = {
    "layer-1-chnnels": 16,
    "layer-2-chnnels": 32,
    "layer-3-chnnels": 64,
    "layer-4-chnnels": 128,
    "dropout-prob": 0.2,
    
}
mlf_run.log_params(params_dict)

mlf_run.log_model(model, "keras")

mlf_run.end()

## Prediction

In [None]:
test_data_dir = pathlib.Path("input/Testing_Data/")

In [None]:
image_count = len(list(test_data_dir.glob('*/*')))
print(image_count)

In [None]:
test_driving_license = list(test_data_dir.glob('driving_license/*'))
test_social_security = list(test_data_dir.glob('social_security/*'))
test_others = list(test_data_dir.glob('others/*'))

In [None]:
test_ds = tf.keras.utils.image_dataset_from_directory(
  test_data_dir,
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
for images, labels in test_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[labels[i]])
        plt.axis("off")

In [None]:
model.evaluate(test_ds)

In [None]:
img = tf.keras.utils.load_img(
    "input/Testing_Data/others/111.jpg", target_size=(img_height, img_width)
)
img_array = tf.keras.utils.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) # Create a batch

In [None]:
predictions = model.predict(img_array)
score = tf.nn.softmax(predictions[0])
print(
    "This image most likely belongs to {} with a {:.2f} percent confidence."
    .format(class_names[np.argmax(score)], 100 * np.max(score))
)

In [None]:
def get_prediction(img_array):
    predictions = model.predict(img_array)
    score = tf.nn.softmax(predictions[0])
    class_name = class_names[np.argmax(score)]
    return class_name

In [None]:
import random

classes = [test_driving_license, test_social_security, test_others]
img_dict = {}

img_num = 1
for class_files in classes:
    for file_path in random.choices(class_files, k=3):
        
        img = tf.keras.utils.load_img(
            file_path, target_size=(img_height, img_width)
        )
        img_array = tf.keras.utils.img_to_array(img)
        
        actual = str(file_path).split("/")[-2]
        prediction = get_prediction(tf.expand_dims(img_array, 0))
        
        img_dict[f"image-{img_num}"] = mlf.Image(
            data_or_path=img_array.astype("uint8"),
            caption="CNN predictions",
            class_groups={
                "actuals": actual, 
                "predictions": prediction
                },
        )
        img_num+=1

mlf_run.log_images(img_dict)
mlf_run.end()