# Image Classification Project 6
Choose three classes from the Open Images Dataset. Train a neural net that is able to classify images into these three categories.



In [4]:
classes = ['Cat', 'Dog', 'Person']
print(classes)

['Cat', 'Dog', 'Person']


## Dataset
https://storage.googleapis.com/openimages/web/visualizer/index.html?type=detection

## Base model
VGG 19

In [5]:
# imports
import numpy as np
import matplotlib.pyplot as plt
from keras.applications import VGG19
from keras.layers import Dense, Flatten, Conv2D, LeakyReLU
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
import tensorflow as tf
import fiftyone as fo
import fiftyone.zoo as foz

#check if GPU is available
print(tf.reduce_sum(tf.random.normal([1000, 1000])))
print(tf.config.list_physical_devices('GPU'))



tf.Tensor(25.663765, shape=(), dtype=float32)
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


## Task
1. Preparation: Split dataset into a 70/30 Train/test split


In [6]:
ratio_train_test = 0.3
num_samples = 1500
# Customize where zoo datasets are downloaded
dataset_name = "open-images-v7"
seed = 42
splits = ('train', 'test', 'validation')



# load cat data
chosen_class = classes[0]

if fo.dataset_exists(chosen_class):
    dataset = fo.load_dataset(chosen_class)

dataset = foz.load_zoo_dataset(
    name=dataset_name,
    splits=splits,
    label_types=["classifications"],
    classes=chosen_class,
    max_samples=num_samples,
    seed=seed,
    shuffle=True,
    dataset_name="cdp-dataset",
    only_matching=True,
)
print(dataset)

# load dog data
chosen_class = classes[1]

if fo.dataset_exists(chosen_class):
    dataset = fo.load_dataset(chosen_class)

dataset_dog = foz.load_zoo_dataset(
    name=dataset_name,
    splits=splits,
    label_types=["classifications"],
    classes=chosen_class,
    max_samples=num_samples,
    seed=seed,
    shuffle=True,
    dataset_name=chosen_class,
    dataset_path=chosen_class,
    only_matching=True,
)

dataset.merge_samples(dataset_dog)

# load person data
chosen_class = classes[2]

if fo.dataset_exists(chosen_class):
    dataset = fo.load_dataset(chosen_class)

dataset_person = foz.load_zoo_dataset(
    name=dataset_name,
    splits=splits,
    label_types=["classifications"],
    classes=chosen_class,
    max_samples=num_samples,
    seed=seed,
    shuffle=True,
    dataset_name=chosen_class,
    dataset_path=chosen_class,
    only_matching=True,
)

dataset.merge_samples(dataset_person)

print(fo.list_datasets())


Downloading split 'train' to 'C:\Users\Michael\fiftyone\open-images-v7\train' if necessary
Downloading 'https://storage.googleapis.com/openimages/2018_04/train/train-images-boxable-with-rotation.csv' to 'C:\Users\Michael\fiftyone\open-images-v7\train\metadata\image_ids.csv'
 100% |██████|    4.8Gb/4.8Gb [1.1m elapsed, 0s remaining, 70.6Mb/s]      
Downloading 'https://storage.googleapis.com/openimages/v5/class-descriptions-boxable.csv' to 'C:\Users\Michael\fiftyone\open-images-v7\train\metadata\classes.csv'
Downloading 'https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy.json' to 'C:\Users\Michael\AppData\Local\Temp\tmpeyrxl_gq\metadata\hierarchy.json'
Downloading 'https://storage.googleapis.com/openimages/v5/train-annotations-human-imagelabels-boxable.csv' to 'C:\Users\Michael\fiftyone\open-images-v7\train\labels\classifications.csv'
Downloading 1500 images
 100% |█████████████████| 1500/1500 [43.4s elapsed, 0s remaining, 26.5 files/s]      
Downloading split '

In [12]:
print(dataset.info.keys())
print(fo.list_datasets())
fo.delete_dataset("Dog")
fo.delete_dataset("Person")

session = fo.launch_app(dataset.view())

dict_keys(['hierarchy', 'classes_map'])
['Dog', 'Person', 'cdp-dataset']


In [15]:
export_dir = "datasets/cdp--tfrecords"
label_field = "Classifications"

dataset_or_view = fo.load_dataset("cdp-dataset")

# Export the dataset as tfrecords

tf_records = fo.Dataset.from_dir(
    export_dir,
    dataset_type=fo.types.TFObjectDetectionDataset,
    label_field=label_field,
    dataset_name="cdp-dataset-",
    only_matching=True,



Directory 'datasets/cdp--tfrecords' already exists; export will be merged with existing files


ValueError: No compatible field(s) of type <class 'fiftyone.core.labels.Classification'> found

In [None]:

# Define parameters for the loader
img_height = 224
img_width = 224


# Load the training data
train_datagen = ImageDataGenerator(
    rescale=1. / 255,


)

train_generator = train_datagen.flow_from_directory(
    train_path,
    target_size=(img_height, img_width),
    class_mode='categorical',
    subset='training')  # set as training data

test_generator = train_datagen.flow_from_directory(
    test_path,
    target_size=(img_height, img_width),
    class_mode='categorical',
    subset='training')  # set as training data

# Load the validation data
validation_generator = train_datagen.flow_from_directory(
    val_path,
    target_size=(img_height, img_width),
    class_mode='categorical',
    subset='validation')  # set as validation data



In [None]:
# Get the class labels and the number of images per class from the generators
class_labels = train_generator.class_indices
n_images_per_class = train_generator.classes.shape[0] // len(class_labels)

# Plot the datasets
fig, axes = plt.subplots(1, 2, figsize=(10, 5))

# Plot the training dataset
train_generator.class_indices = class_labels
class_counts_train = train_generator.classes
class_counts_train = [class_counts_train[class_counts_train == i].shape[0] for i in range(len(class_labels))]
axes[0].bar(class_labels.keys(), class_counts_train)
axes[0].set_title('Training Dataset')

# Plot the validation dataset
validation_generator.class_indices = class_labels
class_counts_val = validation_generator.classes
class_counts_val = [class_counts_val[class_counts_val == i].shape[0] for i in range(len(class_labels))]
axes[1].bar(class_labels.keys(), class_counts_val)
axes[1].set_title('Validation Dataset')

# Set the labels and show the plot
plt.xlabel('Classes')
plt.ylabel('Number of Images')
plt.tight_layout()
plt.show()


2. Train a VGG19 network from scratch (randomly initialized weights) and estimate the testset accuracy.

In [None]:
# Load a VGG19 model with random initialized weights
base_model = VGG19(weights=None, include_top=False, input_shape=(img_height, img_width, 3))

# Create a new model
plain_model = Sequential()
# Add the base model as the first layer
plain_model.add(base_model)
# Flatten the output of the base model
plain_model.add(Flatten())
# Add the final output layer with softmax activation
plain_model.add(Dense(len(classes), activation='softmax'))

# Compile the model
plain_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
plain_history = plain_model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10)

plain_model.save("models/plain_model_raw_data.h5")

3. Use an imagenet pretrained VGG19 network, train the model and estimate the testset accuracy.

In [None]:
# Load a VGG19 model with random initialized weights
pre_trained_model = VGG19(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))

# Create a new model
trained_model = Sequential()
# Add the base model as the first layer
trained_model.add(pre_trained_model)
# Flatten the output of the base model
trained_model.add(Flatten())
# Add the final output layer with softmax activation
trained_model.add(Dense(len(classes), activation='softmax'))

# Compile the model
trained_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
trained_history  = trained_model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10)

trained_model.save("models/trained_model_raw_data.h5")

#### The differences in loss and accuracy of the plain and pre trained network over the first 10 epochs

In [None]:
# Extract loss and accuracy values for both models
plain_loss = plain_history.history['loss']
plain_accuracy = plain_history.history['accuracy']
trained_loss = trained_history.history['loss']
trained_accuracy = trained_history.history['accuracy']

# Plot the loss curves
plt.figure(figsize=(12, 6))
plt.plot(range(1, 11), plain_loss, 'b-', label='Plain Model')
plt.plot(range(1, 11), trained_loss, 'r-', label='Pre-trained Model')
plt.title('Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Plot the accuracy curves
plt.figure(figsize=(12, 6))
plt.plot(range(1, 11), plain_accuracy, 'b-', label='Plain Model')
plt.plot(range(1, 11), trained_accuracy, 'r-', label='Pre-trained Model')
plt.title('Accuracy Curves')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

4. Data cleansing: Remove “bad” images from the dataset. Which did you remove? How many? Discuss results.

5. Add data augmentation: 
    - Random flip
    - Random contrast
    - Random translation

In [None]:
def custom_preprocessing(image):
    # Generate a random contrast factor
    contrast_factor = np.random.uniform(0.8, 1.2)
    # Apply contrast adjustment
    image = image * contrast_factor
    image = tf.clip_by_value(image, 0.0, 1.0)  # Clip values to the [0, 1] range
    return image

In [None]:
# Define parameters for the loader
batch_size = 10
img_height = 224
img_width = 224

# Load the training data
train_datagen = ImageDataGenerator(
        rescale=1. / 255,
        validation_split=0.3,
        horizontal_flip=True,  # Apply random flip
        vertical_flip=True,  # Apply random flip
        width_shift_range=0.2,  # Apply random translation
        height_shift_range=0.2,  # Apply random translation
        preprocessing_function=custom_preprocessing # Apply random contrast
)

train_generator = train_datagen.flow_from_directory(
    base_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training')  # set as training data

# Load the validation data
validation_generator = train_datagen.flow_from_directory(
    base_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation')  # set as validation data

#### Train both models again

In [None]:
# Load a VGG19 model with random initialized weights
base_model = VGG19(weights=None, include_top=False, input_shape=(img_height, img_width, 3))

# Create a new model
plain_model = Sequential()
# Add the base model as the first layer
plain_model.add(base_model)
# Flatten the output of the base model
plain_model.add(Flatten())
# Add the final output layer with softmax activation
plain_model.add(Dense(len(classes), activation='softmax'))

# Compile the model
plain_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
plain_history = plain_model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10)

plain_model.save("models/plain_model_augmentated_data.h5")

In [None]:
# Load a VGG19 model with random initialized weights
pre_trained_model = VGG19(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))

# Create a new model
trained_model = Sequential()
# Add the base model as the first layer
trained_model.add(pre_trained_model)
# Flatten the output of the base model
trained_model.add(Flatten())
# Add the final output layer with softmax activation
trained_model.add(Dense(len(classes), activation='softmax'))

# Compile the model
trained_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
trained_history  = trained_model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10)

trained_model.save("models/trained_model_augmentated_data.h5")

In [None]:
# Extract loss and accuracy values for both models
plain_loss = plain_history.history['loss']
plain_accuracy = plain_history.history['accuracy']
trained_loss = trained_history.history['loss']
trained_accuracy = trained_history.history['accuracy']

# Plot the loss curves
plt.figure(figsize=(12, 6))
plt.plot(range(1, 11), plain_loss, 'b-', label='Plain Model')
plt.plot(range(1, 11), trained_loss, 'r-', label='Pre-trained Model')
plt.title('Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Plot the accuracy curves
plt.figure(figsize=(12, 6))
plt.plot(range(1, 11), plain_accuracy, 'b-', label='Plain Model')
plt.plot(range(1, 11), trained_accuracy, 'r-', label='Pre-trained Model')
plt.title('Accuracy Curves')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

6. Rebuild VGG19. After layer block4_conv4 (25, 25, 512):
    - Add inception layer with dimensionality reduction (no of output filters should be 512, choose own values for the filter dimensionality reduction in 1x1 layers)
    - Add conv layer (kernel 1x1,  filters 1024, padding valid, stride 1, activation leaky relu)
    - Add conv layer (kernel 3x3,  filters 1024, padding same, stride 1, activation relu)
    - Freeze conv2 layers and before

In [None]:
# Load a VGG19 model with random initialized weights
base_model = VGG19(weights=None, include_top=False, input_shape=(img_height, img_width, 3))

# Create a new model
plain_model = Sequential()

# Add layers until block4_conv4
for layer in base_model.layers:
    plain_model.add(layer)
    if layer.name == 'block4_conv4':
        break

# Freeze the layers before block2_conv2
freeze = True
for layer in plain_model.layers:
    if layer.name == 'block2_conv2':
        freeze = False
    layer.trainable = not freeze

# Add the inception layer with dimensionality reduction
plain_model.add(Conv2D(512, (1, 1), activation='relu'))
plain_model.add(Conv2D(256, (1, 1), activation='relu'))
plain_model.add(Conv2D(512, (3, 3), padding='same', activation='relu'))

# Add the conv layer with kernel 1x1, filters 1024, padding valid, stride 1, activation leaky relu
plain_model.add(Conv2D(1024, (1, 1), padding='valid', strides=1, activation=LeakyReLU(alpha=0.1)))

# Add the conv layer with kernel 3x3, filters 1024, padding same, stride 1, activation relu
plain_model.add(Conv2D(1024, (3, 3), padding='same', strides=1, activation='relu'))

# Flatten the output of the last added layer
plain_model.add(Flatten())

# Add the final output layer with softmax activation
plain_model.add(Dense(len(classes), activation='softmax'))

# Compile the model
plain_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
plain_history = plain_model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10)

plain_model.save("models/plain_Rebuild_VGG19_model_augmentated_data.h5")

In [None]:
# Load a VGG19 model with random initialized weights
pre_trained_model = VGG19(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))

# Create a new model
trained_model = Sequential()

# Add layers until block4_conv4
for layer in pre_trained_model.layers:
    trained_model.add(layer)
    if layer.name == 'block4_conv4':
        break

# Freeze the layers before block2_conv2
freeze = True
for layer in trained_model.layers:
    if layer.name == 'block2_conv2':
        freeze = False
    layer.trainable = not freeze

# Add the inception layer with dimensionality reduction
trained_model.add(Conv2D(512, (1, 1), activation='relu'))
trained_model.add(Conv2D(256, (1, 1), activation='relu'))
trained_model.add(Conv2D(512, (3, 3), padding='same', activation='relu'))

# Add the conv layer with kernel 1x1, filters 1024, padding valid, stride 1, activation leaky relu
trained_model.add(Conv2D(1024, (1, 1), padding='valid', strides=1, activation=LeakyReLU(alpha=0.1)))

# Add the conv layer with kernel 3x3, filters 1024, padding same, stride 1, activation relu
trained_model.add(Conv2D(1024, (3, 3), padding='same', strides=1, activation='relu'))

# Flatten the output of the last added layer
trained_model.add(Flatten())

# Add the final output layer with softmax activation
trained_model.add(Dense(len(classes), activation='softmax'))

# Compile the model
trained_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
trained_history  = trained_model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10)

trained_model.save("models/trained_Rebuild_VGG19_model_augmentated_data.h5")

In [None]:
# Extract loss and accuracy values for both models
plain_loss = plain_history.history['loss']
plain_accuracy = plain_history.history['accuracy']
trained_loss = trained_history.history['loss']
trained_accuracy = trained_history.history['accuracy']

# Plot the loss curves
plt.figure(figsize=(12, 6))
plt.plot(range(1, 11), plain_loss, 'b-', label='Plain Model')
plt.plot(range(1, 11), trained_loss, 'r-', label='Pre-trained Model')
plt.title('Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Plot the accuracy curves
plt.figure(figsize=(12, 6))
plt.plot(range(1, 11), plain_accuracy, 'b-', label='Plain Model')
plt.plot(range(1, 11), trained_accuracy, 'r-', label='Pre-trained Model')
plt.title('Accuracy Curves')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

7. Test a few of your own images and present the results

8. Answer the following questions:
    - What accuracy can be achieved? What is the accuracy of the train vs. test set?
    - On what infrastructure did you train it? What is the inference time?
    - What are the number of parameters of the model?
    - Which categories are most likely to be confused by the algorithm? Show results in a confusion matrix.