# Projektabgabe: Machine Learning


In [None]:
import tensorflow as tf
from tensorflow import keras
from keras import layers, models, optimizers, Sequential, regularizers
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from datetime import datetime
from sklearn.model_selection import train_test_split

sns.set()

Der `cats_vs_dogs` Datensatz besteht aus 23.262 Bildern von Katzen und Hunden. Die Bilder sind in 2 Klassen aufgeteilt: Katzen und Hunde (gelabelter Datensatz). Der Datensatz beinhaltet außerdem 1738 korrupte Bilder, die automatisch entfernt wurden.

`split=["train[0:16000]", "train[16000:20000]"]` teilt den Datensatz in 2 Teile auf. Der erste Teil wird für das Training verwendet, der zweite Teil für die Validierung. Mittels `shuffle_files=True` werden die Bilder zufällig gemischt.


In [None]:
(ds_train, ds_test), ds_info = tfds.load('CatsVsDogs', split=["train[:80%]", "train[80%:]"], shuffle_files=True, as_supervised=True, with_info=True)

In [None]:
ds_examples = tfds.visualization.show_examples(ds_train, ds_info, rows=4, cols=4)

In [None]:
def convert_to_gray(image, label):
    return tf.image.rgb_to_grayscale(image), label

def normalize_img(image, label):
    return tf.cast(image, tf.float32) / 255.0, label

def resize_img(image, label):
    return tf.image.resize(image, (128, 128)), label

In [None]:
ds_train = ds_train.map(convert_to_gray, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_train = ds_train.map(normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_train = ds_train.map(resize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_train = ds_train.cache()
ds_train = ds_train.batch(32)
ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)

In [None]:
ds_test = ds_test.map(convert_to_gray, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_test = ds_test.map(normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_test = ds_test.map(resize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_test = ds_test.batch(64)
ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)

https://www.kaggle.com/code/uysimty/keras-cnn-dog-or-cat-classification

In [None]:
model = Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)),
    layers.BatchNormalization(),
    layers.MaxPool2D((2, 2)),
    layers.Dropout(0.25),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPool2D((2, 2)),
    layers.Dropout(0.25),
    
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPool2D((2, 2)),
    layers.Dropout(0.25),
    
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(1, activation="sigmoid")
])

model.compile(optimizer=optimizers.RMSprop(), loss=keras.losses.BinaryCrossentropy(), metrics=["accuracy"])

In [None]:
model.summary()

In [None]:
time = datetime.now().strftime("%d%m-%H%M")

checkpoint_path = f"checkpoints/cats-dogs-{time}.ckpt"

In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

checkpoints = ModelCheckpoint(filepath=checkpoint_path, save_best_only=True, save_weights_only=True)
stop = EarlyStopping(patience=10, restore_best_weights=True)
lr_reduce = ReduceLROnPlateau(monitor='val_accuracy', patience=2, verbose=1, factor=0.5, min_lr=0.00001)

callbacks = [checkpoints, stop, lr_reduce]

In [None]:
history = model.fit(ds_train, validation_data=ds_test, epochs=20, callbacks=callbacks, use_multiprocessing=True)

In [None]:
history_df = pd.DataFrame(history.history)

print(history_df)

In [None]:
highest = history_df["val_accuracy"].max().round(2)

model.save(f"cats-dogs-{time}-{highest}.h5")

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(20, 7))

sns.lineplot(x=history_df.index, y=history_df["loss"], ax=axs[0], label="loss")
sns.lineplot(x=history_df.index, y=history_df["val_loss"], ax=axs[0], label="val_loss")
sns.lineplot(x=history_df.index, y=history_df["accuracy"], ax=axs[1], label="accuracy")
sns.lineplot(x=history_df.index, y=history_df["val_accuracy"], ax=axs[1], label="val_accuracy")

axs[0].set_title("Loss")
axs[1].set_title("Accuracy")

axs[0].set_xlabel("Epochen")
axs[0].set_ylabel("Loss")

axs[1].set_xlabel("Epochen")
axs[1].set_ylabel("Accuracy")