In [1]:
from google.colab import drive
drive.mount('/content/drive')
!mkdir data
!wget -P data/ https://github.com/SVizor42/ML_Zoomcamp/releases/download/straight-curly-data/data.zip
!unzip -q data/data.zip

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
mkdir: cannot create directory ‘data’: File exists
--2024-12-02 22:37:08--  https://github.com/SVizor42/ML_Zoomcamp/releases/download/straight-curly-data/data.zip
Resolving github.com (github.com)... 20.205.243.166
Connecting to github.com (github.com)|20.205.243.166|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/405934815/e712cf72-f851-44e0-9c05-e711624af985?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241202%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241202T223708Z&X-Amz-Expires=300&X-Amz-Signature=5e8cd5f001a25dafeb480e528bab0ff410ae6cc239b747a66504603297391b72&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Ddata.zip&response-content-type=application%2Foctet-stream [following]
--2024

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [3]:
# define the model
model = keras.Sequential([
    # input layer with specified shape
    layers.Input(shape=(200, 200, 3)),

    # convolutional layer
    layers.Conv2D(
        filters=32,
        kernel_size=(3, 3),
        activation="relu"
    ),

    # max pooling layer
    layers.MaxPooling2D(pool_size=(2, 2)),

    # flatten layer to convert 2D feature maps to 1D vector
    layers.Flatten(),

    # dense hidden layer
    layers.Dense(64, activation="relu"),

    # output layer for binary classification
    layers.Dense(1, activation="sigmoid")
])

# compile the model with SGD optimizer
optimizer = keras.optimizers.SGD(learning_rate=0.002, momentum=0.8)
model.compile(
    optimizer=optimizer,
    loss="binary_crossentropy",
    metrics=["accuracy"]
)


In [4]:
model.summary()

In [5]:
# create the image generator
image_generator = ImageDataGenerator(rescale=1./255)

# create train generator
train_generator = image_generator.flow_from_directory(
    "data/train",
    target_size=(200, 200),
    batch_size=20,
    class_mode="binary",
    shuffle=True
)

# create test generator
test_generator = image_generator.flow_from_directory(
    "data/test",
    target_size=(200, 200),
    batch_size=20,
    class_mode="binary",
    shuffle=True
)

# fit the model
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=test_generator
)


Found 800 images belonging to 2 classes.
Found 201 images belonging to 2 classes.
Epoch 1/10


  self._warn_if_super_not_called()


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 153ms/step - accuracy: 0.5228 - loss: 0.7152 - val_accuracy: 0.6169 - val_loss: 0.6505
Epoch 2/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 153ms/step - accuracy: 0.6440 - loss: 0.6178 - val_accuracy: 0.6368 - val_loss: 0.6145
Epoch 3/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 129ms/step - accuracy: 0.6947 - loss: 0.5744 - val_accuracy: 0.6418 - val_loss: 0.6112
Epoch 4/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 150ms/step - accuracy: 0.7351 - loss: 0.5365 - val_accuracy: 0.6617 - val_loss: 0.6022
Epoch 5/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 162ms/step - accuracy: 0.7549 - loss: 0.5189 - val_accuracy: 0.6667 - val_loss: 0.6149
Epoch 6/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 114ms/step - accuracy: 0.7581 - loss: 0.5222 - val_accuracy: 0.6617 - val_loss: 0.5858
Epoch 7/10
[1m40/40[0m [32m━━━━━━━

In [6]:
# get the training history as a dictionary
history_dict = history.history
history_dict

{'accuracy': [0.5512499809265137,
  0.6537500023841858,
  0.6899999976158142,
  0.7225000262260437,
  0.7412499785423279,
  0.7425000071525574,
  0.7524999976158142,
  0.7587500214576721,
  0.7749999761581421,
  0.8112499713897705],
 'loss': [0.6933026313781738,
  0.6105664968490601,
  0.5727519392967224,
  0.5507877469062805,
  0.534469485282898,
  0.5295429825782776,
  0.5014182925224304,
  0.481684148311615,
  0.482320100069046,
  0.43216511607170105],
 'val_accuracy': [0.6169154047966003,
  0.6368159055709839,
  0.641791045665741,
  0.6616915464401245,
  0.6666666865348816,
  0.6616915464401245,
  0.6915422677993774,
  0.6865671873092651,
  0.676616907119751,
  0.6666666865348816],
 'val_loss': [0.6505066156387329,
  0.6145282983779907,
  0.6112041473388672,
  0.6022021770477295,
  0.6149374842643738,
  0.585772693157196,
  0.5822759866714478,
  0.5852249264717102,
  0.5886169672012329,
  0.5738077163696289]}

In [7]:
# get relevant parts for answering homework questions
train_acc = history_dict['accuracy']
train_loss = history_dict['loss']

In [8]:
# median of training accuracy for all the epochs
np.median(train_acc)

0.7418749928474426

In [9]:
# standard deviation of training loss for all the epoch
np.std(train_loss)

0.07045769475275268

In [10]:
# create the image generator with augmentations for training
train_generator = ImageDataGenerator(
    rescale=1./255,
    rotation_range=50,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

# create test generator (no augmentation for test data)
test_generator = ImageDataGenerator(rescale=1./255)

# create train data generator
train_data = train_generator.flow_from_directory(
    "data/train",
    target_size=(200, 200),
    batch_size=20,
    class_mode="binary",
    shuffle=True
)

# create test data generator
test_data = test_generator.flow_from_directory(
    "data/test",
    target_size=(200, 200),
    batch_size=20,
    class_mode="binary",
    shuffle=True
)

# continue training the existing model
history = model.fit(
    train_data,
    epochs=10,
    validation_data=test_data
)

Found 800 images belonging to 2 classes.
Found 201 images belonging to 2 classes.
Epoch 1/10


  self._warn_if_super_not_called()


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 237ms/step - accuracy: 0.6366 - loss: 0.6573 - val_accuracy: 0.6915 - val_loss: 0.6106
Epoch 2/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 267ms/step - accuracy: 0.6970 - loss: 0.5840 - val_accuracy: 0.6915 - val_loss: 0.6094
Epoch 3/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 286ms/step - accuracy: 0.6940 - loss: 0.5983 - val_accuracy: 0.6866 - val_loss: 0.5642
Epoch 4/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 280ms/step - accuracy: 0.6939 - loss: 0.6145 - val_accuracy: 0.6965 - val_loss: 0.5919
Epoch 5/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 264ms/step - accuracy: 0.7058 - loss: 0.5786 - val_accuracy: 0.7114 - val_loss: 0.5644
Epoch 6/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 233ms/step - accuracy: 0.6985 - loss: 0.5817 - val_accuracy: 0.7114 - val_loss: 0.5487
Epoch 7/10
[1m40/40[0m [32m━━━

In [11]:
# update history
history_dict = history.history

# get relevant parts
test_loss = history_dict['val_loss']
test_acc_last5e = history_dict['val_accuracy'][5:10]

In [13]:
# mean of test loss for all the epochs
np.mean(test_loss)

0.5698466897010803

In [14]:
# average of test accuracy for the last 5 epochs
np.mean(test_acc_last5e)

0.7114427804946899