# Load and Pre-Process Data

In [None]:
import tensorflow as tf
import seaborn as sns
import numpy as np
from PIL import Image
import glob
from collections import defaultdict
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
IMG_SIZE = (94, 125)

def pixels_from_path(file_path):
    im = Image.open(file_path)

    im = im.resize(IMG_SIZE)
    np_im = np.array(im)
    # Matrix of pixel RGB values

    return np_im

shape_counts = defaultdict(int)

for i, cat in enumerate(glob.glob('cats/*')[:1000]):
    if i%100 == 0:
        print(i)
    img_shape = pixels_from_path(cat).shape
    shape_counts[str(img_shape)]= shape_counts[str(img_shape)]+ 1

shape_items = list(shape_counts.items())
shape_items.sort(key = lambda x: x[1])
shape_items.reverse()

# 10% of the data will automatically be used for validation
validation_size = 0.1
img_size = IMG_SIZE # resize images to be 374x500 (most common shape)
num_channels = 3 # RGB
sample_size = 8192 # We'll use 8192 pictures (2**13)
pixels_from_path(glob.glob('cats/*')[5]).shape

SAMPLE_SIZE = 2048

print("loading training cat images...")
cat_train_set = np.asarray([pixels_from_path(cat) for cat in glob.glob('cats/*')[:SAMPLE_SIZE]])

print("loading training dog images...")
dog_train_set = np.asarray([pixels_from_path(dog) for dog in glob.glob('dogs/*')[:SAMPLE_SIZE]])

valid_size = 512

print("loading validation cat images...")
cat_valid_set = np.asarray([pixels_from_path(cat) for cat in glob.glob('cats/*')[-valid_size:]])

print("loading validation dog images...")
dog_valid_set = np.asarray([pixels_from_path(dog) for dog in glob.glob('dogs/*')[-valid_size:]])

x_train = np.concatenate([cat_train_set, dog_train_set])
labels_train = np.asarray([1 for _ in range(SAMPLE_SIZE)]+[0 for _ in range(SAMPLE_SIZE)]) x_valid = np.concatenate([cat_valid_set, dog_valid_set])
labels_valid = np.asarray([1 for _ in range(valid_size)]+[0 for _ in range(valid_size)])

# CNN Implementation

In [None]:
total_pixels = img_size[0] * img_size[1] * 3
fc_size = 512
inputs = keras.Input(shape=(img_size[1], img_size[0], 3), name = 'ani_image')
x = layers.Flatten(name = 'flattened_img')(inputs) # turn image to vector.
x = layers.Dense(fc_size, activation = 'relu', name = 'first_layer')(x)
outputs = layers.Dense(1, activation = 'sigmoid', name = 'class')(x)

model = keras.Model(inputs = inputs, outputs = outputs)

# CNN Training

In [None]:
customAdam = keras.optimizers.Adam(lr = 0.001)
model.compile(optimizer = customAdam,  # Optimizer
              # Loss function to minimize
              loss = "mean_squared_error",
              # List of metrics to monitor
              metrics = ["binary_crossentropy","mean_squared_error"])
              
print('# Fit model on training data')
  
history = model.fit(x_train, 
                    labels_train, 
                    batch_size = 32, 
                    shuffle = True, # important since we loaded cats first, dogs second.
                    epochs = 3,
                    validation_data = (x_valid, labels_valid))
                    
#Train on 4096 samples, validate on 2048 samples
#loss: 0.5000 - binary_crossentropy: 8.0590 - mean_squared_error: 0.5000 - val_loss: 0.5000 - val_binary_crossentropy: 8.0591 - val_mean_squared_error: 0.5000

# Train It

In [None]:
fc_layer_size = 128
img_size = IMG_SIZE
  
conv_inputs = keras.Input(shape=(img_size[1], img_size[0],3), name='ani_image')
conv_layer = layers.Conv2D(24, kernel_size=3, activation='relu')(conv_inputs)
conv_layer = layers.MaxPool2D(pool_size=(2,2))(conv_layer)
conv_x = layers.Flatten(name = 'flattened_features')(conv_layer) #turn image to vector.
  
conv_x = layers.Dense(fc_layer_size, activation='relu', name='first_layer')(conv_x)
conv_x = layers.Dense(fc_layer_size, activation='relu', name='second_layer')(conv_x)
conv_outputs = layers.Dense(1, activation='sigmoid', name='class')(conv_x)
  
conv_model = keras.Model(inputs=conv_inputs, outputs=conv_outputs)
  
customAdam = keras.optimizers.Adam(lr=1e-6)
conv_model.compile(optimizer=customAdam,  # Optimizer
              # Loss function to minimize
              loss="binary_crossentropy",
              # List of metrics to monitor
              metrics=["binary_crossentropy","mean_squared_error"])
              
#Epoch 5/5 loss: 1.6900 val_loss: 2.0413 val_mean_squared_error: 0.3688
print('# Fit model on training data')
  
history = conv_model.fit(x_train, 
                    labels_train, #we pass it th labels
                    #If the model is taking too long to train, make this bigger
                    #If it is taking too long to load for the first epoch, make this smaller
                    batch_size=32, 
                    shuffle = True,
                    epochs=5,
                    # We pass it validation data to
                    # monitor loss and metrics
                    # at the end of each epoch
                    validation_data=(x_valid, labels_valid))
  
preds = conv_model.predict(x_valid)
preds = np.asarray([pred[0] for pred in preds])
np.corrcoef(preds, labels_valid)[0][1] # 0.15292172

# Analyze

In [None]:
sns.scatterplot(x= preds, y= labels_valid)

cat_quantity = sum(labels_valid)
  
for i in range(1,10):
    print('threshold :' + str(.1*i))
    print(sum(labels_valid[preds  .1*i])/labels_valid[preds  .1*i].shape[0])

# Scatterplot

In [None]:
def animal_pic(index):
    return Image.fromarray(x_valid[index])

def cat_index(index):
    return conv_model.predict(np.asarray([x_valid[124]]))[0][0]

# Save Model

In [None]:
conv_model.save('conv_model_big')

# An example output would be:

index = 600

print("probability of being a cat: {}".format(cat_index(index)))

animal_pic(index)

# Interface

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=3e480598-2102-4b3c-a7af-02aec7eef77c' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>