# Model Validations

## Setup Environment

In [48]:
from pathlib import Path, PurePath

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory

In [2]:
DATASET_URL="https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip"
BATCH_SIZE = 32
IMG_SIZE = (160, 160)

## Training Data

### Prepare Dataset

In [3]:
data_file = Path(
    tf.keras.utils.get_file(
        "cats_and_dogs.zip",
        origin=DATASET_URL,
        extract=True,
    )
)

data_dir = data_file.parent / "cats_and_dogs_filtered"
train_dir = data_dir / "train"
validation_dir = data_dir / "validation"

Path.mkdir(data_dir, exist_ok=True)
Path.mkdir(train_dir, exist_ok=True)
Path.mkdir(validation_dir, exist_ok=True)

Downloading data from https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip


In [4]:
train_dataset__untuned = tf.keras.preprocessing.image_dataset_from_directory(
    train_dir,
    shuffle=True,
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
)

class_names = train_dataset__untuned.class_names

Found 2000 files belonging to 2 classes.


In [5]:
eval_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    validation_dir,
    shuffle=True,
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
)

eval_cardinality = tf.data.experimental.cardinality(eval_dataset)
test_dataset__untuned = eval_dataset.take(eval_cardinality // 5)
validation_dataset__untuned = eval_dataset.skip(eval_cardinality // 5)

Found 1000 files belonging to 2 classes.


### Configure Dataset

In [6]:
train_dataset = train_dataset__untuned.prefetch(
    buffer_size=tf.data.experimental.AUTOTUNE,
)

validation_dataset = validation_dataset__untuned.prefetch(
    buffer_size=tf.data.experimental.AUTOTUNE,
)

test_dataset = test_dataset__untuned.prefetch(
    buffer_size=tf.data.experimental.AUTOTUNE,
)

## Predictions

In [32]:
NUM_BATCHES = 100

### Partially trained model

In [10]:
partial_model = tf.keras.models.load_model("./models/init_partial")



In [64]:
partial_model__confidences = []
partial_model__validations = []

for image_batch, label_batch in validation_dataset.as_numpy_iterator():
    model_output = partial_model.predict(
        image_batch
    ).flatten()
    predictions_sigmoid = tf.nn.sigmoid(model_output)
    predictions_confidence = tf.abs(predictions_sigmoid - 0.5) / 0.5
    predictions = tf.cast(tf.round(predictions_sigmoid), tf.int32).numpy()
    
    partial_model__confidences.append(predictions_confidence)
    partial_model__validations.append((predictions == label_batch).astype(int))

partial_model__confidence = np.concatenate(partial_model__confidences)
partial_model__validation = np.concatenate(partial_model__validations)

In [65]:
partialmodel_data = pd.DataFrame(
    {
        "confidence": partial_model__confidence,
        "validation": partial_model__validation,
    },
)
partialmodel_data

Unnamed: 0,confidence,validation
0,0.136964,1
1,0.374706,1
2,0.553394,1
3,0.686658,1
4,0.617029,1
...,...,...
803,0.418096,1
804,0.315803,1
805,0.356133,1
806,0.611851,1


In [66]:
partialmodel_data.to_csv("validations/partial_model.csv", index=False)

### Fully trained model

In [40]:
final_model = tf.keras.models.load_model("./models/final")



In [61]:
final_model__confidences = []
final_model__validations = []

for image_batch, label_batch in validation_dataset.as_numpy_iterator():
    model_output = final_model.predict(
        image_batch
    ).flatten()
    predictions_sigmoid = tf.nn.sigmoid(model_output)
    predictions_confidence = tf.abs(predictions_sigmoid - 0.5) / 0.5
    predictions = tf.cast(tf.round(predictions_sigmoid), tf.int32).numpy()
    
    final_model__confidences.append(predictions_confidence)
    final_model__validations.append((predictions == label_batch).astype(int))

final_model__confidence = np.concatenate(final_model__confidences)
final_model__validation = np.concatenate(final_model__validations)

In [62]:
finalmodel_data = pd.DataFrame(
    {
        "confidence": final_model__confidence,
        "validation": final_model__validation,
    },
)
finalmodel_data

Unnamed: 0,confidence,validation
0,0.996536,1
1,0.998405,1
2,0.999903,1
3,0.986727,1
4,0.561795,1
...,...,...
803,0.989044,1
804,0.998684,1
805,0.997855,1
806,0.995161,1


In [63]:
finalmodel_data.to_csv("validations/final_model.csv", index=False)