In [1]:
#libraries for creating neural networks
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Activation, Dense, BatchNormalization, Conv2D, MaxPool2D, Dropout, GlobalMaxPool2D, Conv2DTranspose
from tensorflow.keras.layers import concatenate, add
from keras.layers.core import Lambda, RepeatVector, Reshape
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import binary_crossentropy, MeanIoU, categorical_crossentropy
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

from tqdm import tqdm_notebook, tnrange
from itertools import chain
from skimage.io import imread, imshow, concatenate_images
from skimage.transform import resize
from skimage.morphology import label

#libraries to help visualize data
import matplotlib.pyplot as plt

import os
import glob
import random

Using TensorFlow backend.


In [2]:
#read in data
#original img width and height: 1080 x 720 px
train_folder = "HighwayDriving/Train"
test_folder = "HighwayDriving/Test"
os.chdir(train_folder) #get into folder
print(os.getcwd())

#counter for number of images
imgsListTrain = []
imgsListTest = []
labelTrain = []
labelTest = []
imgDirectoryTrain = []
imgDirectoryTest = []

#get list of images (train folder)
for (root, dirs, files) in os.walk("./", topdown=True): #get filenames and folders in current directory
#     print("Folders")
#     print(root)
#     print("Subdirectories")
#     print(dirs)
#     print("Files")
#     print(files)
    
    #save subdirectories (so that img paths can be made later)
    if root == "./":
        imgDirectoryTrain = dirs
    
    #count image files here
    index = root.find("image")
    if index != -1:
        for img in files:
            imgsListTrain.append(img)
    
    #obtain labels of image files
    label = root.find("label")
    if label != -1:
        for ids in files: 
            labelTrain.append(ids)

# print(imgDirectoryTrain)
# print(len(imgsListTrain))
# print(imgsListTrain)
# print(labelTrain)

os.chdir("../")
os.chdir("../")

#do the same for the test images
os.chdir(test_folder)
print(os.getcwd())

for (root, dirs, files) in os.walk("./", topdown=True):
    #save subdirectories (so that test img paths can be made later)
    if root == "./":
        imgDirectoryTest = dirs
    
    #count image files here
    index = root.find("image")
    if index != -1:
        for img in files:
            imgsListTest.append(img)
    
    #obtain labels of image files
    label = root.find("label")
    if label != -1:
        for ids in files: 
            labelTest.append(ids)

# print(imgDirectoryTest)
# print(len(imgsListTest))
# print(imgsListTest)
# print(labelTest)

os.chdir("../")
os.chdir("../")


C:\Users\truon\CSCI 4931-DL\PA3\HighwayDriving\Train
C:\Users\truon\CSCI 4931-DL\PA3\HighwayDriving\Test


In [3]:
#convert images into np array and process it
#original image was 1080 x 720, but need to resize image to have equal width and height
img_width = 256
img_height= 256
channels = 10 #since the mask is color coded with multiple classes (10 channels for 10 classes)
#source: https://stackoverflow.com/questions/51590843/keras-multi-class-semantic-segmentation-label

X = np.zeros((len(imgsListTrain), img_width, img_height, channels), dtype=np.float32)
Y = np.zeros((len(imgsListTrain), img_width, img_height, channels), dtype=np.float32)

Xtest = np.zeros((len(imgsListTest), img_width, img_height, channels), dtype=np.float32)
Ytest = np.zeros((len(imgsListTest), img_width, img_height, channels), dtype=np.float32)

train_path = os.getcwd() + "\\" + train_folder
# print(train_path)
test_path = os.getcwd() + "\\" + test_folder
# print(test_path)

counter = 0
i = 0
#load imgs
for n, imgName in tqdm_notebook(enumerate(imgsListTrain), total=len(imgsListTrain)):
    #load images
#     print(train_path+"\\"+imgDirectoryTrain[i]+"\image\\"+imgName)
    img = load_img(train_path+"\\"+imgDirectoryTrain[i]+"\image\\"+imgName, color_mode = "grayscale")
    x_img = img_to_array(img)
    x_img = resize(x_img, (img_width, img_height, channels), mode = 'constant', preserve_range = True)
    #load masks
#     print(train_path+"\\"+imgDirectoryTrain[i]+"\label\\"+labelTrain[n])
    mask = img_to_array(load_img(train_path+"\\"+imgDirectoryTrain[i]+"\label\\"+labelTrain[n], color_mode = "rgb")) #need to fix
    mask = resize(mask, (img_width, img_height, channels), mode = 'constant', preserve_range = True)
    #save images
    X[n] = x_img/255.0
    Y[n] = mask/255.0
    counter += 1
    if counter >= 60:
        counter = 0
        i += 1

i = 0
counter = 0
#do the same for the test dataset
for n, imgName in tqdm_notebook(enumerate(imgsListTest), total=len(imgsListTest)):
    #load test images
#     print(test_path+"\\"+imgDirectoryTest[i]+"\image\\"+imgName)
    x_test = load_img(test_path+"\\"+imgDirectoryTest[i]+"\image\\"+imgName, color_mode = "grayscale")
    x_test = img_to_array(x_test)
    x_test = resize(x_test, (img_width, img_height, channels), mode = 'constant', preserve_range = True)
    #load masks
#     print(test_path+"\\"+imgDirectoryTest[i]+"\label\\"+labelTest[n])
    testMask = img_to_array(load_img(test_path+"\\"+imgDirectoryTest[i]+"\label\\"+labelTest[n], color_mode = "rgb"))
    testMask = resize(testMask, (img_width, img_height, channels), mode = 'constant', preserve_range = True)
    #save test images
    Xtest[n] = x_test/255.0
    Ytest[n] = testMask/255.0
    counter += 1
    if counter >= 60:
        counter = 0
        i += 1

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for n, imgName in tqdm_notebook(enumerate(imgsListTrain), total=len(imgsListTrain)):


HBox(children=(FloatProgress(value=0.0, max=900.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for n, imgName in tqdm_notebook(enumerate(imgsListTest), total=len(imgsListTest)):


HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))




In [4]:
#create semantic segmentation model
#model referenced from lecture 23 semantic segmentation and u-net
#I guess the number of filters should be divisible by the img_width/height? I tried changing the img_size to (300, 300) with the same neural network parameters below and it did not work...
num_filters = 16
inputShape = Input((img_width, img_height, channels), name='img')
dropout_rate = 0.1
kernel = 3
pool = 2

#convolution path
C1 = Conv2D(filters = num_filters, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(inputShape)
C1 = BatchNormalization(axis = -1, epsilon = 0.001)(C1)
C1 = Activation('relu')(C1)
C1 = Conv2D(filters = num_filters, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(C1)
C1 = BatchNormalization(axis = -1, epsilon = 0.001)(C1)
C1 = Activation('relu')(C1)
P1 = MaxPool2D(pool_size = (pool, pool), strides = 2)(C1)
P1 = Dropout(dropout_rate)(P1)

C2 = Conv2D(filters = num_filters * 2, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(P1)
C2 = BatchNormalization(axis = -1, epsilon = 0.001)(C2)
C2 = Activation('relu')(C2)
C2 = Conv2D(filters = num_filters * 2, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(C2)
C2 = BatchNormalization(axis = -1, epsilon = 0.001)(C2)
C2 = Activation('relu')(C2)
P2 = MaxPool2D(pool_size = (pool, pool), strides = 2)(C2)
P2 = Dropout(dropout_rate)(P2)

C3 = Conv2D(filters = num_filters * 4, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(P2)
C3 = BatchNormalization(axis = -1, epsilon = 0.001)(C3)
C3 = Activation('relu')(C3)
C3 = Conv2D(filters = num_filters * 4, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(C3)
C3 = BatchNormalization(axis = -1, epsilon = 0.001)(C3)
C3 = Activation('relu')(C3)
P3 = MaxPool2D(pool_size = (pool, pool), strides = 2)(C3)
P3 = Dropout(dropout_rate)(P3)

C4 = Conv2D(filters = num_filters * 8, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(P3)
C4 = BatchNormalization(axis = -1, epsilon = 0.001)(C4)
C4 = Activation('relu')(C4)
C4 = Conv2D(filters = num_filters * 8, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(C4)
C4 = BatchNormalization(axis = -1, epsilon = 0.001)(C4)
C4 = Activation('relu')(C4)
P4 = MaxPool2D(pool_size = (pool, pool), strides = 2)(C4)
P4 = Dropout(dropout_rate)(P4)

C5 = Conv2D(filters = num_filters * 16, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(P4)
C5 = BatchNormalization(axis = -1, epsilon = 0.001)(C5)
C5 = Activation('relu')(C5)
C5 = Conv2D(filters = num_filters * 16, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(C5)
C5 = BatchNormalization(axis = -1, epsilon = 0.001)(C5)
C5 = Activation('relu')(C5)

#transpose of convolution path
U1 = Conv2DTranspose(num_filters * 8, kernel_size = (kernel, kernel), strides = (2,2), padding = 'same')(C5)
U1 = concatenate([U1, C4]) 
U1 = Dropout(dropout_rate)(U1)
C6 = Conv2D(filters = num_filters * 8, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(U1)
C6 = BatchNormalization(axis = -1, epsilon = 0.001)(C6)
C6 = Activation('relu')(C6)
C6 = Conv2D(filters = num_filters * 8, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(C6)
C6 = BatchNormalization(axis = -1, epsilon = 0.001)(C6)
C6 = Activation('relu')(C6)

U2 = Conv2DTranspose(num_filters * 4, kernel_size = (kernel, kernel), strides = (2,2), padding = 'same')(C6)
U2 = concatenate([U2, C3]) 
U2 = Dropout(dropout_rate)(U2)
C7 = Conv2D(filters = num_filters * 4, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(U2)
C7 = BatchNormalization(axis = -1, epsilon = 0.001)(C7)
C7 = Activation('relu')(C7)
C7 = Conv2D(filters = num_filters * 4, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(C7)
C7 = BatchNormalization(axis = -1, epsilon = 0.001)(C7)
C7 = Activation('relu')(C7)

U3 = Conv2DTranspose(num_filters * 2, kernel_size = (kernel, kernel), strides = (2,2), padding = 'same')(C7)
U3 = concatenate([U3, C2])
U3 = Dropout(dropout_rate)(U3)
C8 = Conv2D(filters = num_filters * 2, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(U3)
C8 = BatchNormalization(axis = -1, epsilon = 0.001)(C8)
C8 = Activation('relu')(C8)
C8 = Conv2D(filters = num_filters * 2, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(C8)
C8 = BatchNormalization(axis = -1, epsilon = 0.001)(C8)
C8 = Activation('relu')(C8)

U4 = Conv2DTranspose(num_filters, kernel_size = (kernel, kernel), strides = (2,2), padding = 'same')(C8)
U4 = concatenate([U4, C1])
U4 = Dropout(dropout_rate)(U4)
C9 = Conv2D(filters = num_filters, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(U4)
C9 = BatchNormalization(axis = -1, epsilon = 0.001)(C9)
C9 = Activation('relu')(C9)
C9 = Conv2D(filters = num_filters, kernel_size = (kernel, kernel), kernel_initializer = "he_normal", padding = 'same')(C9)
C9 = BatchNormalization(axis = -1, epsilon = 0.001)(C9)
C9 = Activation('relu')(C9)

output = Conv2D(channels, (1,1), activation='softmax')(C9)
model = Model(inputs=[inputShape], outputs = [output])

In [5]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
img (InputLayer)                [(None, 512, 512, 10 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 512, 512, 16) 1456        img[0][0]                        
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 512, 512, 16) 64          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 512, 512, 16) 0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [6]:
#compile model
#might need to change to categorical bc there are 10 classes that each pixel needs to be compared against instead of 2 classes
model.compile(optimizer=Adam(learning_rate=0.05), loss="categorical_crossentropy", metrics=["accuracy", MeanIoU(num_classes=10)])

In [7]:
#callbacks
callback = [
    EarlyStopping(patience=10, verbose=1),
    ReduceLROnPlateau(factor=0.1, patience=5, min_lr=0.00001, verbose=1),
    ModelCheckpoint("weights.{epoch:2d}-{loss:.2f}-{accuracy:.2f}.h5", verbose=1, save_best_only=True, save_weights_only=True)
]

In [8]:
#fit model
#canonly do small epochs due to hardware and system constraints (only have a CPU...)
result = model.fit(X, Y, batch_size=20, epochs=25, callbacks=callback, validation_data=(Xtest, Ytest))

Epoch 1/25

Epoch 00001: val_loss improved from inf to 5.64125, saving model to weights. 1-6.19-0.25.h5
Epoch 2/25

Epoch 00002: val_loss improved from 5.64125 to 5.58610, saving model to weights. 2-5.75-0.41.h5
Epoch 3/25

Epoch 00003: val_loss improved from 5.58610 to 5.58216, saving model to weights. 3-5.73-0.41.h5
Epoch 4/25

Epoch 00004: val_loss improved from 5.58216 to 5.58190, saving model to weights. 4-5.73-0.41.h5
Epoch 5/25

Epoch 00005: val_loss improved from 5.58190 to 5.58163, saving model to weights. 5-5.73-0.37.h5
Epoch 6/25

Epoch 00006: val_loss improved from 5.58163 to 5.58108, saving model to weights. 6-5.73-0.20.h5
Epoch 7/25

Epoch 00007: val_loss did not improve from 5.58108
Epoch 8/25

Epoch 00008: val_loss did not improve from 5.58108
Epoch 9/25

Epoch 00009: val_loss did not improve from 5.58108
Epoch 10/25

Epoch 00010: val_loss did not improve from 5.58108
Epoch 11/25

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.005000000074505806.

Epoch 0001

In [9]:
#evaluate training dataset
model.evaluate(X, Y, verbose = 1)



[5.729398727416992, 0.1812700778245926, 0.4442262351512909]

In [10]:
#evaluate test dataset
model.evaluate(Xtest, Ytest, verbose=1)



[5.581194877624512, 0.24844634532928467, 0.4482772648334503]

In [9]:
#predict on test dataset
model.predict(Xtest, verbose=1)



array([[[[0.5056102 ],
         [0.57418364],
         [0.5562329 ],
         ...,
         [0.57107383],
         [0.56944746],
         [0.5062948 ]],

        [[0.55256474],
         [0.651553  ],
         [0.6355519 ],
         ...,
         [0.6492327 ],
         [0.64825344],
         [0.60192287]],

        [[0.5689202 ],
         [0.6339838 ],
         [0.65526193],
         ...,
         [0.64520335],
         [0.6403639 ],
         [0.59991205]],

        ...,

        [[0.3783669 ],
         [0.44725323],
         [0.44575724],
         ...,
         [0.4446098 ],
         [0.44987226],
         [0.39309818]],

        [[0.37229466],
         [0.41879094],
         [0.43857375],
         ...,
         [0.43886802],
         [0.43810332],
         [0.3703441 ]],

        [[0.37446016],
         [0.37404954],
         [0.39615324],
         ...,
         [0.42120552],
         [0.39332092],
         [0.3510139 ]]],


       [[[0.505183  ],
         [0.5733023 ],
         [0.55