# Training models

The following sheet is the one used on Goolge Cloud Platform to train the classification models.

In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, ImageDataGenerator, img_to_array, array_to_img
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Flatten, GlobalAveragePooling2D, Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, LearningRateScheduler
from tensorflow.keras.optimizers import SGD, Adam, Adadelta
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import pickle
#from google.cloud import storage

# Data Preparation

The below cell is specific for Google Cloud Platform

In [3]:

storage_client = storage.Client()
bucket = storage_client.get_bucket("compiled-dataset")


my_prefix = "Compiled Dataset/Organic/"
filename = "dataset/Organic/"
blobs = bucket.list_blobs(prefix = my_prefix, delimiter="/")

for blob in blobs:
    if(blob.name != my_prefix): # ignoring the subfolder itself 
        file_name = blob.name.replace(my_prefix, filename)
        blob.download_to_filename(file_name)




In [2]:
plastic_box = len(os.listdir("dataset/Plastic Box"))
plastic_bottle = len(os.listdir("dataset/Plastic Bottle"))
plastic_bag = len(os.listdir("dataset/Plastic Bag"))
plastic_round_container = len(os.listdir("dataset/Plastic Round Container"))
cardboard = len(os.listdir("dataset/Cardboard"))
drinking_carton = len(os.listdir("dataset/Drinking Carton"))
glass_bottle = len(os.listdir("dataset/Glass Bottle"))
glass_cup = len(os.listdir("dataset/Glass Cup"))
metal_can = len(os.listdir("dataset/Metal Can"))
paper = len(os.listdir("dataset/Paper"))
chips_bag = len(os.listdir("dataset/Chips Bag"))
organic = len(os.listdir("dataset/Organic"))
total = plastic_box + plastic_bottle + plastic_bag + plastic_round_container + cardboard + drinking_carton + glass_bottle + glass_cup + metal_can + paper + chips_bag + organic
total

31749

In [3]:
print("plastic box:", plastic_box)
print("plastic bottle:", plastic_bottle)
print("plastic bag:", plastic_bag)
print("plastic round container:", plastic_round_container)
print("cardboard:", cardboard)
print("drinking carton:", drinking_carton)
print("glass bottle:", glass_bottle)
print("glass cup:", glass_cup)
print("metal can:", metal_can)
print("paper:", paper)
print("organic:", organic)
print("chips bag:", chips_bag)

plastic box: 2346
plastic bottle: 2479
plastic bag: 2751
plastic round container: 2705
cardboard: 2781
drinking carton: 2789
glass bottle: 2523
glass cup: 2548
metal can: 2782
paper: 2436
organic: 2859
chips bag: 2750


In [2]:
!rm -rf `find -type d -name .ipynb_checkpoints`

In [3]:
data_path = "dataset"
material_labels = {'Cardboard': 0, 'Chips Bag': 1, 'Drinking Carton': 2, 'Glass Bottle': 3,
                   'Glass Cup': 4, 'Metal Can': 5, 'Organic': 6, 'Paper': 7, 'Plastic Bag':
                   8, 'Plastic Bottle': 9, 'Plastic Box': 10, 'Plastic Round Container': 11}


In [4]:
DataGen = ImageDataGenerator(
    rescale = 1./255,
    validation_split = 0.1
)
train_datagen = DataGen.flow_from_directory(
    data_path,
    batch_size = 32,
    class_mode = "categorical",
    subset = "training")
validation_datagen = DataGen.flow_from_directory(
    data_path,
    batch_size = 32,
    class_mode = "categorical",
    subset = "validation")

Found 28580 images belonging to 12 classes.
Found 3169 images belonging to 12 classes.


In [5]:
print(train_datagen.class_indices)
print(validation_datagen.class_indices)

{'Cardboard': 0, 'Chips Bag': 1, 'Drinking Carton': 2, 'Glass Bottle': 3, 'Glass Cup': 4, 'Metal Can': 5, 'Organic': 6, 'Paper': 7, 'Plastic Bag': 8, 'Plastic Bottle': 9, 'Plastic Box': 10, 'Plastic Round Container': 11}
{'Cardboard': 0, 'Chips Bag': 1, 'Drinking Carton': 2, 'Glass Bottle': 3, 'Glass Cup': 4, 'Metal Can': 5, 'Organic': 6, 'Paper': 7, 'Plastic Bag': 8, 'Plastic Bottle': 9, 'Plastic Box': 10, 'Plastic Round Container': 11}


In [6]:
print("Training Images:",train_datagen.samples)
print("Validation Images:",validation_datagen.samples)

Training Images: 28580
Validation Images: 3169


# DenseNet169

In [20]:
from tensorflow.keras.applications import DenseNet169
densenet = DenseNet169(
    include_top=False,
    weights="imagenet",
    input_shape=(224,224,3)
)

In [21]:
for layers in densenet.layers:
    layers.trainable = False
base_output = GlobalAveragePooling2D()(densenet.output)
final_output = Dense(12, activation = "softmax")(base_output)
final_output

<tf.Tensor 'dense_1/Softmax:0' shape=(None, 12) dtype=float32>

In [22]:
my_densenet_model = Model(inputs = densenet.input, outputs = final_output)

In [23]:
!rm -rf `find -type d -name .ipynb_checkpoints`

In [24]:
early_stopping = EarlyStopping(monitor = "val_loss", patience=10)
model_save = ModelCheckpoint("densenet169_model.h5", save_best_only=True, verbose=1)

In [25]:
my_densenet_model.compile(optimizer="Adam", loss="categorical_crossentropy", metrics=["accuracy"])
history_densenet169 = my_densenet_model.fit(train_datagen, validation_data=validation_datagen, epochs=100, 
             steps_per_epoch = train_datagen.samples//32,
             callbacks=[early_stopping, model_save])

Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.31151, saving model to densenet169_model.h5
Epoch 2/100
Epoch 00002: val_loss improved from 0.31151 to 0.28365, saving model to densenet169_model.h5
Epoch 3/100
Epoch 00003: val_loss did not improve from 0.28365
Epoch 4/100
Epoch 00004: val_loss did not improve from 0.28365
Epoch 5/100
Epoch 00005: val_loss improved from 0.28365 to 0.28247, saving model to densenet169_model.h5
Epoch 6/100
Epoch 00006: val_loss improved from 0.28247 to 0.28008, saving model to densenet169_model.h5
Epoch 7/100
Epoch 00007: val_loss did not improve from 0.28008
Epoch 8/100
Epoch 00008: val_loss did not improve from 0.28008
Epoch 9/100
Epoch 00009: val_loss did not improve from 0.28008
Epoch 10/100
Epoch 00010: val_loss did not improve from 0.28008
Epoch 11/100
Epoch 00011: val_loss did not improve from 0.28008
Epoch 12/100
Epoch 00012: val_loss did not improve from 0.28008
Epoch 13/100
Epoch 00013: val_loss did not improve from 0.28008
Epoch 14/100


In [34]:
with open('./trainHistoryDensenet169', 'wb') as file_pi:
    pickle.dump(history_densenet169.history, file_pi)

# VGG19

In [30]:
from tensorflow.keras.applications import VGG19
vgg19 = VGG19(
    include_top=False,
    weights="imagenet",
    input_shape=(224,224,3),
    pooling = "avg"
             )

In [31]:
for layers in vgg19.layers:
    layers.trainable = False
fc1 = Dense(4096, activation = "relu", name = "fc1")(vgg19.output)
fc2 = Dense(4096, activation = "relu", name = "fc2")(fc1)
predictions = Dense(12, activation = "softmax", name = "predictions")(fc2) 
my_vgg19_model = Model(inputs = vgg19.input, outputs = predictions)

In [32]:
my_vgg19_model.summary()

Model: "functional_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)      

In [33]:
early_stopping = EarlyStopping(monitor = "val_loss", patience=10)
model_save = ModelCheckpoint("vgg19_model.h5", save_best_only=True, verbose=1)

my_vgg19_model.compile(optimizer="Adam", loss="categorical_crossentropy", metrics=["accuracy"])
history_vgg19 = my_vgg19_model.fit(train_datagen, validation_data=validation_datagen, epochs=100, 
             steps_per_epoch = train_datagen.samples//32,
             callbacks=[early_stopping, model_save])

Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.79075, saving model to vgg19_model.h5
Epoch 2/100
Epoch 00002: val_loss improved from 0.79075 to 0.64977, saving model to vgg19_model.h5
Epoch 3/100
Epoch 00003: val_loss did not improve from 0.64977
Epoch 4/100
Epoch 00004: val_loss improved from 0.64977 to 0.64609, saving model to vgg19_model.h5
Epoch 5/100
Epoch 00005: val_loss improved from 0.64609 to 0.62257, saving model to vgg19_model.h5
Epoch 6/100
Epoch 00006: val_loss improved from 0.62257 to 0.60952, saving model to vgg19_model.h5
Epoch 7/100
Epoch 00007: val_loss did not improve from 0.60952
Epoch 8/100
Epoch 00008: val_loss improved from 0.60952 to 0.57948, saving model to vgg19_model.h5
Epoch 9/100
Epoch 00009: val_loss improved from 0.57948 to 0.57225, saving model to vgg19_model.h5
Epoch 10/100
Epoch 00010: val_loss improved from 0.57225 to 0.54091, saving model to vgg19_model.h5
Epoch 11/100
Epoch 00011: val_loss did not improve from 0.54091
Epoch 12/100
Epoch 00

In [35]:
with open('./trainHistoryVGG19', 'wb') as file_pi:
    pickle.dump(history_vgg19.history, file_pi)

# InceptionResNetV2

In [7]:
DataGen = ImageDataGenerator(
    rescale = 1./255,
    validation_split = 0.1
)
train_datagen = DataGen.flow_from_directory(
    data_path,
    batch_size = 32,
    target_size = (299,299),
    class_mode = "categorical",
    subset = "training")
validation_datagen = DataGen.flow_from_directory(
    data_path,
    batch_size = 32,
    target_size = (299,299),
    class_mode = "categorical",
    subset = "validation")

Found 28580 images belonging to 12 classes.
Found 3169 images belonging to 12 classes.


In [6]:
from tensorflow.keras.applications import InceptionResNetV2

inceptionresnet = InceptionResNetV2(include_top=False,
    weights="imagenet",
    input_shape=(299,299,3) 
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5


In [9]:
for layer in inceptionresnet.layers:
    layer.trainable = False
base_output = GlobalAveragePooling2D()(inceptionresnet.output)
predictions = Dense(12, activation = "softmax", name = "predictions")(base_output)
my_inceptionresnet_model = Model(inputs = inceptionresnet.input, outputs = predictions)

In [11]:
early_stopping = EarlyStopping(monitor = "val_loss", patience=10)
model_save = ModelCheckpoint("inceptionresnet_model.h5", save_best_only=True, verbose=1)

my_inceptionresnet_model.compile(optimizer="Adam", loss="categorical_crossentropy", metrics=["accuracy"])
history_inceptionresnet = my_inceptionresnet_model.fit(train_datagen, validation_data=validation_datagen, epochs=100, 
             steps_per_epoch = train_datagen.samples//32,
             callbacks=[early_stopping, model_save])

Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.31439, saving model to inceptionresnet_model.h5
Epoch 2/100
Epoch 00002: val_loss improved from 0.31439 to 0.27194, saving model to inceptionresnet_model.h5
Epoch 3/100
Epoch 00003: val_loss improved from 0.27194 to 0.26025, saving model to inceptionresnet_model.h5
Epoch 4/100
Epoch 00004: val_loss did not improve from 0.26025
Epoch 5/100
Epoch 00005: val_loss improved from 0.26025 to 0.25946, saving model to inceptionresnet_model.h5
Epoch 6/100
Epoch 00006: val_loss improved from 0.25946 to 0.25592, saving model to inceptionresnet_model.h5
Epoch 7/100
Epoch 00007: val_loss did not improve from 0.25592
Epoch 8/100
Epoch 00008: val_loss did not improve from 0.25592
Epoch 9/100
Epoch 00009: val_loss did not improve from 0.25592
Epoch 10/100
Epoch 00010: val_loss did not improve from 0.25592
Epoch 11/100
Epoch 00011: val_loss did not improve from 0.25592
Epoch 12/100
Epoch 00012: val_loss did not improve from 0.25592
Epoch 13/100
Ep

In [12]:
with open('./trainHistoryInceptionResNet', 'wb') as file_pi:
    pickle.dump(history_inceptionresnet.history, file_pi)

# NASNetLarge

In [4]:
# preparation for NASNetLarge

DataGen = ImageDataGenerator(
    rescale = 1./255,
    validation_split = 0.1
)

train_datagen_nasnet = DataGen.flow_from_directory(
    data_path,
    batch_size = 16,
    target_size = (331,331),
    class_mode = "categorical",
    subset = "training")
validation_datagen_nasnet = DataGen.flow_from_directory(
    data_path,
    batch_size = 16,
    target_size = (331,331),
    class_mode = "categorical",
    subset = "validation")

Found 28580 images belonging to 12 classes.
Found 3169 images belonging to 12 classes.


In [5]:
from tensorflow.keras.applications import NASNetLarge

nasnetlarge = NASNetLarge(
    include_top=False,
    weights="imagenet",
    input_shape=(331,331,3),
)

In [6]:
for layers in nasnetlarge.layers:
    layers.trainable = False
    
base_output = GlobalAveragePooling2D()(nasnetlarge.output)
predictions = Dense(12, activation = "softmax")(base_output)
my_nasnetlarge_model = Model(inputs = nasnetlarge.input, outputs = predictions)

In [7]:
early_stopping = EarlyStopping(monitor = "val_loss", patience=10)
model_save = ModelCheckpoint("nasnetlarge_model.h5", save_best_only=True, verbose=1)
my_nasnetlarge_model.compile(optimizer="Adam", loss="categorical_crossentropy", metrics=["accuracy"])
history_nasnetlarge = my_nasnetlarge_model.fit(train_datagen_nasnet, validation_data=validation_datagen_nasnet, epochs=100, 
             steps_per_epoch = train_datagen_nasnet.samples//16,
             callbacks=[early_stopping, model_save])


Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.27129, saving model to nasnetlarge_model.h5
Epoch 2/100
Epoch 00002: val_loss improved from 0.27129 to 0.26862, saving model to nasnetlarge_model.h5
Epoch 3/100
Epoch 00003: val_loss did not improve from 0.26862
Epoch 4/100
Epoch 00004: val_loss did not improve from 0.26862
Epoch 5/100
Epoch 00005: val_loss did not improve from 0.26862
Epoch 6/100
Epoch 00006: val_loss did not improve from 0.26862
Epoch 7/100
Epoch 00007: val_loss did not improve from 0.26862
Epoch 8/100
Epoch 00008: val_loss did not improve from 0.26862
Epoch 9/100
Epoch 00009: val_loss did not improve from 0.26862
Epoch 10/100
Epoch 00010: val_loss did not improve from 0.26862
Epoch 11/100
Epoch 00011: val_loss did not improve from 0.26862
Epoch 12/100
Epoch 00012: val_loss did not improve from 0.26862


In [8]:
filename = "trainHistoryNASNetLarge"
outfile = open(filename,'wb')
pickle.dump(history_nasnetlarge.history ,outfile)
outfile.close()