### [Story](https://www.pivotaltracker.com/story/show/187949220)
### [Source](https://medium.com/@prathammodi001/keras-cnn-tutorial-classifying-images-made-easy-fb55cc8892ec)

Create from scratch a CNN apllied to computer vision, to classify cats and dogs from a well known image dataset published in [Kaggle](https://www.kaggle.com/c/dogs-vs-cats/overview).

## Dog Breed Classifier CNN

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="-1" 

import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
print(tf.__version__)
if tf.config.list_logical_devices('GPU'):
    print("Yes,  GPU available")
else:
    print("No GPU available")

### Dataset 

In [None]:
# Load dataset
pd.set_option('display.max_columns', None) # show all columns
pd.set_option('display.max.rows', None) # showw all rows

labels_csv = pd.read_csv("./dataset/labels.csv")
labels_csv.describe()

In [None]:
# EDA (exploratory data analysis)
ax = labels_csv["breed"].value_counts().plot.bar(figsize=(20,10))

average_value = labels_csv["breed"].value_counts().mean()
ax.axhline(average_value, color="red", linestyle="--", label="Average")

plt.xlabel("Brred")
plt.ylabel("Count")
plt.title("Breed Distribution")
plt.legend


### Get images and their labels

In [None]:
from IPython.display import Image

In [None]:
# Test one image
for image in labels_csv["id"].head(1):
    display(Image(filename="./dataset/train/"+image+".jpg", width=300, height="300"))

In [None]:
# get al images
filenames = []
for image_id in labels_csv["id"]:
    filenames.append("./dataset/train/"+image_id+".jpg")

filenames

In [None]:
# check if all filenames of all images have been acquired
if len(os.listdir("./dataset/train")) == len(filenames):
    print("Correct! Filenames match amount of files!")
else:
    print("Error! Filenames do not match amount of files")

### labels

In [None]:
labels = labels_csv["breed"].to_numpy()

labels

In [None]:
# check for missing data, lables or images
if len(filenames) == len(labels):
    print("No missing Data!")
else:
    print("Missing data!!!")

In [None]:
unique_breeds = labels
labels[0] == unique_breeds

boolean_labels = []

for label in labels:
    boolean_labels.append(label == unique_breeds)

## Prepare Train, Valid and Test sets

In [None]:
X = filenames
y = boolean_labels

NUM_IMAGES= 1000

from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X[:NUM_IMAGES], y[:NUM_IMAGES], test_size=0.2, random_state=42)
len(X_train), len(y_train), len(X_val), len(y_val)

## Images to tensors

1. Take an image as input
2. Use Tensorflow to read the file and save it to a variable `image`
3. Turn the `image` (.jpeg) into Tensors
4. Normalize the `image`
5. Resize the `image` to 224x224
6. Return the processed image

In [None]:
# test 1
image = plt.imread(filenames[42])
image.shape

In [None]:
# Test 2. 
tensor = tf.io.read_file(filenames[20])
tensor

In [None]:
# Test 3. Normalization: Turn image into numerical Tensor values: 0-255 (RGB)
tensor = tf.image.decode_jpeg(tensor, channels=3)
tensor

In [16]:
# Test 4. : convert RGB numbers 0-255 into 0-1 for each RGB
tf.image.convert_image_dtype(tensor, tf.float32)

<tf.Tensor: shape=(375, 500, 3), dtype=float32, numpy=
array([[[0.4901961 , 0.5019608 , 0.45882356],
        [0.47450984, 0.48627454, 0.4431373 ],
        [0.44705886, 0.47058827, 0.42352945],
        ...,
        [0.37647063, 0.48627454, 0.25490198],
        [0.37254903, 0.48235297, 0.2509804 ],
        [0.38823533, 0.49803925, 0.26666668]],

       [[0.454902  , 0.4666667 , 0.42352945],
        [0.43921572, 0.46274513, 0.4156863 ],
        [0.427451  , 0.45098042, 0.4039216 ],
        ...,
        [0.36862746, 0.4784314 , 0.2392157 ],
        [0.36078432, 0.47058827, 0.23137257],
        [0.37254903, 0.48235297, 0.24313727]],

       [[0.43921572, 0.46274513, 0.42352945],
        [0.43137258, 0.46274513, 0.41960788],
        [0.427451  , 0.45882356, 0.4156863 ],
        ...,
        [0.36078432, 0.47450984, 0.22352943],
        [0.34509805, 0.45882356, 0.20784315],
        [0.34901962, 0.46274513, 0.21176472]],

       ...,

       [[0.8941177 , 0.89019614, 0.882353  ],
        [0.90

In [17]:
IMG_SIZE = 224
# Takes an image file path and turns it into a Tensor
def preprocess_image(image_path, img_size=IMG_SIZE):
    # 1. read image
    image = tf.io.read_file(image_path)
    # 2. turn it into numerical Tensor using 3 channels RGB
    image = tf.image.decode_jpeg(image, cahnnels=3)
    # 3. Normalize: convert 0-255 values into 0-1 for each RGB
    image = tf.image.convert_image_dtype(image, tf.float32)
    # 4. Resize to 224x224
    image = tf.image.resize(image, size=[IMG_SIZE,IMG_SIZE])
    # 5 return image
    return image