In [None]:
# !wget https://github.com/SVizor42/ML_Zoomcamp/releases/download/bee-wasp-data/data.zip
# !unzip data.zip

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten

from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator

np.random.seed(42)
tf.random.set_seed(42)

In [3]:
input_shape = (150, 150, 3)
kernel_size =  (3, 3)
filters = 32
pooling_size = (2, 2)
relu = 'relu'
learning_rate = 0.01

def make_model():
    inputs = tf.keras.Input(shape = input_shape)
    conv2d = Conv2D(filters=filters,kernel_size=kernel_size,activation=relu)(inputs)
    vectors = MaxPooling2D(pool_size=pooling_size)(conv2d)
    flatten = Flatten()(vectors)
    dense64 = Dense(64,activation=relu)(flatten)
    outputs = Dense(1,activation='sigmoid')(dense64)
    model = tf.keras.Model(inputs , outputs)

    optimizer = tf.keras.optimizers.SGD(learning_rate=0.002, momentum=0.8)
    loss = tf.keras.losses.BinaryCrossentropy(from_logits=False)

    model.compile(optimizer=optimizer,loss=loss,metrics=['accuracy'])

    return model

Question 1

Since we have a binary classification problem, what is the best loss function for us?

- `mean squared error`
- **binary crossentropy**
- `categorical crossentropy`
- `cosine similarity`

Question 2

What's the number of parameters in the convolutional layer of our model? You can use the summary method for that.

- 1
- 65
- **896**
- 11214912

In [4]:
model = make_model()
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 150, 150, 3)]     0         
                                                                 
 conv2d (Conv2D)             (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 74, 74, 32)        0         
 D)                                                              
                                                                 
 flatten (Flatten)           (None, 175232)            0         
                                                                 
 dense (Dense)               (None, 64)                11214912  
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                             

In [5]:
train_gen = ImageDataGenerator(rescale=1./255)
test_gen = ImageDataGenerator(rescale=1./255)

train_generator = train_gen.flow_from_directory('./data/train/', target_size=(150,150),class_mode='binary', batch_size=20,shuffle=True)
test_generator = test_gen.flow_from_directory('./data/test/', target_size=(150,150),class_mode='binary', batch_size=20,shuffle=True)

Found 3677 images belonging to 2 classes.
Found 918 images belonging to 2 classes.


In [6]:
train_generator.class_indices

{'bee': 0, 'wasp': 1}

In [7]:
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=test_generator
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Question 3

What is the median of training accuracy for all the epochs for this model?

- 0.20
- 0.40
- 0.60
- **0.80**

In [8]:
np.median(history.history['accuracy'])

0.7642099559307098

Question 4

What is the standard deviation of training loss for all the epochs for this model?

- 0.031
- 0.061
- **0.091**
- 0.131

In [9]:
np.std(history.history['loss'])

0.08813254056231913

In [10]:
gen_transformed = ImageDataGenerator(rescale=1./255,
                                     rotation_range=50,
                                     width_shift_range=0.1,
                                     height_shift_range=0.1,
                                     zoom_range=0.1,
                                     horizontal_flip=True,
                                     fill_mode='nearest')

train_generator = train_gen.flow_from_directory('./data/train/', target_size=(150,150),class_mode='binary', batch_size=20,shuffle=True)
test_generator = test_gen.flow_from_directory('./data/test/', target_size=(150,150),class_mode='binary', batch_size=20,shuffle=True)

Found 3677 images belonging to 2 classes.
Found 918 images belonging to 2 classes.


In [11]:
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=test_generator
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Question 5

Let's train our model for 10 more epochs using the same code as previously.

`Note: make sure you don't re-create the model - we want to continue training the model we already started training.`

What is the mean of test loss for all the epochs for the model trained with augmentations?

- 0.18
- **0.48**
- 0.78
- 0.108

In [12]:
np.mean(history.history['val_loss'])

0.5878636956214904

Question 6

What's the average of test accuracy for the last 5 epochs (from 6 to 10) for the model trained with augmentations?

- 0.38
- 0.58
- **0.78**
- 0.98

In [13]:
np.average(history.history['val_accuracy'][5:])

0.7596949815750123