In [1]:
# !wget https://github.com/SVizor42/ML_Zoomcamp/releases/download/bee-wasp-data/data.zip
# !unzip data.zip

In [2]:
import pandas as pd
import tensorflow as tf

from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

2023-11-16 12:05:56.207782: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


*Model*

In [3]:
model = models.Sequential()
model.add(layers.InputLayer(input_shape=(150, 150, 3)))
model.add(layers.Conv2D(32, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.002, momentum=0.8),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=['accuracy']
)

2023-11-16 12:05:57.833398: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-11-16 12:05:57.840687: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-11-16 12:05:57.841374: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-11-16 12:05:57.842352: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the approp

**Question 1**

Answer: `binary crossentropy`

**Question 2**

In [4]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 74, 74, 32)       0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 175232)            0         
                                                                 
 dense (Dense)               (None, 64)                11214912  
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
Total params: 11,215,873
Trainable params: 11,215,873
Non-trainable params: 0
____________________________________________

*Generators and Training*

In [5]:
train_generator = ImageDataGenerator(rescale=1./255)
test_generator = ImageDataGenerator(rescale=1./255)

train_ds = train_generator.flow_from_directory(
    './data/train',
    class_mode='binary',
    target_size=(150, 150),
    batch_size=20,
    shuffle=True
)

test_ds = test_generator.flow_from_directory(
    './data/test',
    class_mode='binary',
    target_size=(150, 150),
    batch_size=20,
    shuffle=True
)

history = model.fit(train_ds, epochs=10, validation_data=test_ds)
df_history = pd.DataFrame(history.history)

Found 3677 images belonging to 2 classes.
Found 918 images belonging to 2 classes.
Epoch 1/10


2023-11-16 12:06:00.180624: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100
2023-11-16 12:06:00.859162: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-11-16 12:06:00.859975: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-11-16 12:06:00.860012: W tensorflow/stream_executor/gpu/asm_compiler.cc:80] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
2023-11-16 12:06:00.860899: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-11-16 12:06:00.860984: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] INTERNAL: Failed to launch ptxas
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


**Question 3**

In [6]:
round(df_history['accuracy'].median(), 2)

0.8

**Question 4**

In [7]:
round(df_history['loss'].std(), 4)

0.1102

*Data Augmentation*

In [8]:
train_generator = ImageDataGenerator(
    rescale=1./255,
    rotation_range=50,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_ds = train_generator.flow_from_directory(
    './data/train',
    class_mode='binary',
    target_size=(150, 150),
    batch_size=20,
    shuffle=True
)

Found 3677 images belonging to 2 classes.


**Question 5**

In [9]:
history = model.fit(train_ds, epochs=10, validation_data=test_ds)
df_history = pd.DataFrame(history.history)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
round(df_history['val_loss'].mean(), 2)

0.5

**Question 6**

In [11]:
round(df_history['accuracy'][-5:].mean(), 2)

0.79