In [1]:
#Data Prep Stage
import numpy as np
import shutil


In [2]:
#make sure the directory is clean 
try:
    shutil.rmtree("../Project3_data/cnn-split/train")
    shutil.rmtree("../Project3_data/cnn-split/test")
except:
    pass

In [3]:
#create directories for each class in the train and test directories.
import os
# ensure directories exist
from pathlib import Path


Path("../Project3/cnn-split/train/no_damage").mkdir(parents=True, exist_ok=True)
Path("../Project3/cnn-split/train/damage").mkdir(parents=True, exist_ok=True)

Path("../Project3/cnn-split/test/no_damage").mkdir(parents=True, exist_ok=True)
Path("../Project3/cnn-split/test/damage").mkdir(parents=True, exist_ok=True)

In [4]:
all_no_damage = os.listdir('../Project3/data_all_modified/no_damage')
all_damage = os.listdir('../Project3/data_all_modified/damage')

In [5]:
import random

train_no_damage = random.sample(all_no_damage, int(len(all_no_damage)*0.8))
print("train no_damage image count: ", len(train_no_damage))
test_no_damage = [ p for p in all_no_damage if p not in train_no_damage]
print("test no_damage image count: ", len(test_no_damage))
# ensure no overlap:
overlap = [p for p in train_no_damage if p in test_no_damage]
print("len of overlap: ", len(overlap))


train_damage = random.sample(all_damage, int(len(all_damage)*0.8))
print("train damage image count: ", len(train_damage))
test_damage = [ p for p in all_damage if p not in train_damage]
print("test damage image count: ", len(test_damage))
# ensure no overlap:
overlap = [p for p in train_damage if p in test_damage]
print("len of overlap: ", len(overlap))

train no_damage image count:  5721
test no_damage image count:  1431
len of overlap:  0
train damage image count:  11336
test damage image count:  2834
len of overlap:  0


In [6]:
# ensure to copy the images to the directories
for p in train_no_damage:
    shutil.copyfile(os.path.join('../Project3/data_all_modified/no_damage', p), os.path.join('../Project3/cnn-split/train/no_damage', p) )

for p in test_no_damage:
    shutil.copyfile(os.path.join('../Project3/data_all_modified/no_damage', p), os.path.join('../Project3/cnn-split/test/no_damage', p) )

for p in train_damage:
    shutil.copyfile(os.path.join('../Project3/data_all_modified/damage', p), os.path.join('../Project3/cnn-split/train/damage', p) )

for p in test_damage:
    shutil.copyfile(os.path.join('../Project3/data_all_modified/damage', p), os.path.join('../Project3/cnn-split/test/damage', p) )

In [7]:
# check counts:
print("Files in train/no_damage: ", len(os.listdir("../Project3/cnn-split/train/no_damage")))
print("Files in train/damage: ", len(os.listdir("../Project3/cnn-split/train/damage")))

print("Files in test/no_damage: ", len(os.listdir("../Project3/cnn-split/test/no_damage")))
print("Files in test/damage: ", len(os.listdir("../Project3/cnn-split/test/damage")))

Files in train/no_damage:  6870
Files in train/damage:  13599
Files in test/no_damage:  2580
Files in test/damage:  5097


In [10]:
#Train preprocessing
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.layers.experimental.preprocessing import Rescaling
train_data_dir = '../Project3/cnn-split/train'

batch_size = 32
# target image size
img_height = 150
img_width = 150

# note that subset="training", "validation", "both", and dictates which dataset is returned
train_ds, val_ds = tf.keras.utils.image_dataset_from_directory(
train_data_dir,
validation_split=0.2,
subset="both",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size
)
rescale = Rescaling(scale=1.0/255)
train_rescale_ds = train_ds.map(lambda image,label:(rescale(image),label))
val_rescale_ds = val_ds.map(lambda image,label:(rescale(image),label))

Found 20469 files belonging to 2 classes.
Using 16376 files for training.
Using 4093 files for validation.


In [11]:
#Test preprocessing
test_data_dir = '../Project3/cnn-split/test/'

batch_size = 2

# this is what was used in the paper --
img_height = 150
img_width = 150

# note that subset="training", "validation", "both", and dictates what is returned
test_ds = tf.keras.utils.image_dataset_from_directory(
test_data_dir,
seed=123,
image_size=(img_height, img_width),
)

# approach 1: manually rescale data --
rescale = Rescaling(scale=1.0/255)
test_rescale_ds = test_ds.map(lambda image,label:(rescale(image),label))

Found 7677 files belonging to 2 classes.


In [16]:
#Imports
from keras import layers
from keras import models
import pandas as pd
from keras import optimizers

In [19]:
# Intializing a A dense (i.e., fully connected) ANN sequential model for Part 2 A
model_ann = models.Sequential()

model_ann.add(layers.Flatten(input_shape=(img_width, img_height, 3)))

model_ann.add(layers.Dense(512, activation='relu'))  
model_ann.add(layers.Dense(256, activation='relu'))  
model_ann.add(layers.Dense(1, activation='sigmoid'))  

# Compile model
# RMSprop (Root Mean Square Propagation) is commonly used in training deep neural networks.
model_ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Generating the summary of the model
model_ann.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 67500)             0         
                                                                 
 dense_1 (Dense)             (None, 512)               34560512  
                                                                 
 dense_2 (Dense)             (None, 256)               131328    
                                                                 
 dense_3 (Dense)             (None, 1)                 257       
                                                                 
Total params: 34692097 (132.34 MB)
Trainable params: 34692097 (132.34 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [20]:
#fit the model from image generator
history = model_ann.fit(
            train_rescale_ds,
            epochs=10,
            validation_data=val_rescale_ds
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [21]:
#Testing accuracy of the high density model
test_loss, test_accuracy = model_ann.evaluate(test_rescale_ds, verbose=0)
test_accuracy

0.7545916438102722

In [23]:
#persistance of the model for future use
model_ann.save("ANN.keras")

In [29]:
#lenet-5 model

model_lenet5 = models.Sequential()

# Layer 1: Convolutional layer with 6 filters of size 3x3, followed by average pooling
model_lenet5.add(layers.Conv2D(6, kernel_size=(3, 3), activation='relu', input_shape=(150,150,3)))
model_lenet5.add(layers.AveragePooling2D(pool_size=(2, 2)))

# Layer 2: Convolutional layer with 16 filters of size 3x3, followed by average pooling
model_lenet5.add(layers.Conv2D(16, kernel_size=(3, 3), activation='relu'))
model_lenet5.add(layers.AveragePooling2D(pool_size=(2, 2)))

# Flatten the feature maps to feed into fully connected layers
model_lenet5.add(layers.Flatten())


# Layer 3: Fully connected layer with 120 neurons
model_lenet5.add(layers.Dense(120, activation='relu'))

# Layer 4: Fully connected layer with 84 neurons
model_lenet5.add(layers.Dense(84, activation='relu'))

# Output layer: Fully connected layer with num_classes neurons (e.g., 3 )
model_lenet5.add(layers.Dense(2, activation='softmax'))

# Compile model
model_lenet5.compile(optimizer=optimizers.RMSprop(learning_rate=1e-4), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Generating the summary of the model
model_lenet5.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_17 (Conv2D)          (None, 148, 148, 6)       168       
                                                                 
 average_pooling2d_2 (Avera  (None, 74, 74, 6)         0         
 gePooling2D)                                                    
                                                                 
 conv2d_18 (Conv2D)          (None, 72, 72, 16)        880       
                                                                 
 average_pooling2d_3 (Avera  (None, 36, 36, 16)        0         
 gePooling2D)                                                    
                                                                 
 flatten_6 (Flatten)         (None, 20736)             0         
                                                                 
 dense_18 (Dense)            (None, 120)              

In [30]:
#fit the model from image generator
history = model_lenet5.fit(
            train_rescale_ds,
            batch_size=32,
            epochs=10,
            validation_data=val_rescale_ds
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [32]:
#Testing accuracy of the high density model
test_loss, test_accuracy = model_lenet5.evaluate(test_rescale_ds, verbose=0)
test_accuracy

0.8991793394088745

In [33]:
#persistance of the model for future use
model_lenet5.save("lenet5.keras")

In [43]:
#Model From Paper
# Intializing a sequential model
model_cnn2 = models.Sequential()

model_cnn2.add(layers.Conv2D(32, (3, 3), activation='relu', padding="same", input_shape=(150,150,3)))

# Adding max pooling to reduce the size of output of first conv layer
model_cnn2.add(layers.MaxPooling2D((2, 2), padding = 'same'))

model_cnn2.add(layers.Conv2D(64, (3, 3), activation='relu', padding="same"))
model_cnn2.add(layers.MaxPooling2D((2, 2), padding = 'same'))

model_cnn2.add(layers.Conv2D(128, (3, 3), activation='relu', padding="same"))
model_cnn2.add(layers.MaxPooling2D((2, 2), padding = 'same'))

model_cnn2.add(layers.Conv2D(128, (3, 3), activation='relu', padding="same"))
model_cnn2.add(layers.MaxPooling2D((2, 2), padding = 'same'))

# flattening the output of the conv layer after max pooling to make it ready for creating dense connections
model_cnn2.add(layers.Flatten())

#Adding dropout for regularization
model_cnn2.add(layers.Dropout(.5))

# Adding a fully connected dense layer with 512 neurons
model_cnn2.add(layers.Dense(512, activation='relu'))

# Adding a fully connected dense layer with one neuron for binary classification
model_cnn2.add(layers.Dense(1, activation='sigmoid'))

# Compile model
# RMSprop (Root Mean Square Propagation) is commonly used in training deep neural networks.
model_cnn2.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

# Generating the summary of the model
model_cnn2.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_43 (Conv2D)          (None, 150, 150, 32)      896       
                                                                 
 max_pooling2d_39 (MaxPooli  (None, 75, 75, 32)        0         
 ng2D)                                                           
                                                                 
 conv2d_44 (Conv2D)          (None, 75, 75, 64)        18496     
                                                                 
 max_pooling2d_40 (MaxPooli  (None, 38, 38, 64)        0         
 ng2D)                                                           
                                                                 
 conv2d_45 (Conv2D)          (None, 38, 38, 128)       73856     
                                                                 
 max_pooling2d_41 (MaxPooli  (None, 19, 19, 128)     

In [44]:
#fit the model from and training with less epochs since the paper claims these are the best parameters
#i.e. testing the accuracy when trained with less epochs
history = model_cnn2.fit(
            train_rescale_ds,
            batch_size=32,
            epochs=5,
            validation_data=val_rescale_ds
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [45]:
#Testing accuracy of the high density model
test_loss, test_accuracy = model_cnn2.evaluate(test_rescale_ds, verbose=0)
test_accuracy

0.6921453475952148

In [46]:
#persistance of the model for future use
model_cnn2.save("CNN_2.keras")