### Libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Load Libraries
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from keras import layers
from keras.layers import Input, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D
from keras.layers import AveragePooling2D, MaxPooling2D, Dropout, GlobalMaxPooling2D, GlobalAveragePooling2D
from keras.applications import vgg16
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.models import Sequential
from PIL import Image
%matplotlib inline

In [None]:
# Load images
def load_im():
    input_im, input_label, data_split = [], [], []
    resize = (224, 224)
    # Loop in folders
    for dirname, _, filenames in os.walk('/content/drive/MyDrive/data/split_data'):
        for filename in filenames:
            photo_path = os.path.join(dirname, filename)
            photo_class = dirname.split('/')[-1]
            try:
                read_im = cv2.imread(photo_path)
                input_im.append(cv2.resize(read_im, resize))
                # potholes == 1
                if photo_class == 'pothole':
                    input_label.append(1)
                # speedbump == 2
                elif photo_class == 'speedbump':
                    input_label.append(2)
                # normal == 0
                elif photo_class == 'normal':
                    input_label.append(0)

                # Determine data split (you may need to adapt this based on your actual data structure)
                if "train" in dirname:
                    data_split.append('train')
                elif "test" in dirname:
                    data_split.append('test')
                else:
                    # If the folder does not specify train or test, you may need to handle it accordingly
                    data_split.append('unknown_split')
            except:
                print(photo_path)
    # return list of images, another list of corresponding labels, and the data split information
    return input_im, input_label, data_split

input_im, input_label, data_split = load_im()

/content/drive/MyDrive/data/split_data/train/normal/226.jpg


In [None]:
def train_test_split(input_im, input_label, data_split):
    # Identify indices for train and test sets
    train_indices = [i for i, split in enumerate(data_split) if split == 'train']
    test_indices = [i for i, split in enumerate(data_split) if split == 'test']

    # Split the data
    train_x, test_x = np.take(input_im, train_indices, axis=0), np.take(input_im, test_indices, axis=0)
    train_y, test_y = np.take(input_label, train_indices, axis=0), np.take(input_label, test_indices, axis=0)

    # Return train and test sets for both images and labels
    return train_x, test_x, train_y, test_y

# Use existing splits
train_x, test_x, train_y, test_y = train_test_split(input_im, input_label, data_split)

In [None]:
def append_im(input_im, input_label, im_iterator):
    input_label_n = input_label.copy()
    input_im_n = input_im.copy()
    for i in range(len(im_iterator)):
        im = im_iterator[i]
        im = im.astype('uint8')
        im_lbl = [input_label[i]]
        input_im_n = np.append(input_im_n, im, axis = 0)
        input_label_n = np.append(input_label_n, im_lbl, axis = 0)
    return input_im_n, input_label_n

 ### PCA

In [None]:
# Flipping
flip_data_generator = ImageDataGenerator(horizontal_flip = True)
im_iterator = flip_data_generator.flow(train_x, batch_size = 1, shuffle = False)
input_im_n, input_label_n = append_im(train_x, train_y, im_iterator)

# Reshape
nx, ny, nz = train_x.shape[1], train_x.shape[2], train_x.shape[3]
train_x_nn, test_x_nn = input_im_n, test_x
train_x = input_im_n.reshape((input_im_n.shape[0], nx * ny * nz)) / 255
test_x = test_x.reshape((test_x.shape[0], nx * ny * nz)) / 255
train_y = input_label_n.reshape((input_label_n.shape[0], 1))
test_y = test_y.reshape((test_y.shape[0], 1))

In [None]:
# Dimensionality reduction - PCA
im_pca = PCA()
im_pca.fit(train_x)
variance_explained_list = im_pca.explained_variance_ratio_.cumsum()
print(variance_explained_list)

[0.24292623 0.37603674 0.42919601 ... 1.         1.         1.        ]


In [None]:
test_x_pca = im_pca.transform(test_x)
train_x_pca = im_pca.transform(train_x)

In [None]:
# SVM
def svm_grid_search(C, kernel, train_x, train_y):
    accuracy_score_list = []

    for c in C:
        # Model training
        svmClassifier = svm.SVC(C = c, kernel = kernel)
        svmClassifier.fit(train_x, train_y.ravel())
        # Prediction on test set
        pred_y = svmClassifier.predict(train_x)
        # Accuracy
        accuracy = accuracy_score(train_y, pred_y)
        accuracy_score_list.append(accuracy)
        print('Regularization parameters: ', c, 'Accuracy', accuracy)

    max_accurarcy_id = accuracy_score_list.index(max(accuracy_score_list))
    return C[max_accurarcy_id]

C, kernel = [0.1 * i for i in range(1, 30)], 'rbf'
opt_C = svm_grid_search(C, kernel, train_x_pca, train_y)

Regularization parameters:  0.1 Accuracy 0.7670807453416149
Regularization parameters:  0.2 Accuracy 0.8198757763975155
Regularization parameters:  0.30000000000000004 Accuracy 0.8490683229813665
Regularization parameters:  0.4 Accuracy 0.8801242236024844
Regularization parameters:  0.5 Accuracy 0.9024844720496894
Regularization parameters:  0.6000000000000001 Accuracy 0.9180124223602485
Regularization parameters:  0.7000000000000001 Accuracy 0.9322981366459627
Regularization parameters:  0.8 Accuracy 0.94472049689441
Regularization parameters:  0.9 Accuracy 0.9577639751552796
Regularization parameters:  1.0 Accuracy 0.9645962732919254
Regularization parameters:  1.1 Accuracy 0.9720496894409938
Regularization parameters:  1.2000000000000002 Accuracy 0.9770186335403727
Regularization parameters:  1.3 Accuracy 0.9838509316770186
Regularization parameters:  1.4000000000000001 Accuracy 0.9850931677018634
Regularization parameters:  1.5 Accuracy 0.9881987577639751
Regularization parameters:

In [None]:
# Test set
svmClassifier = svm.SVC(C = opt_C, kernel = kernel)
svmClassifier.fit(train_x_pca, train_y.ravel())
pred_y = svmClassifier.predict(test_x_pca)
accuracy = accuracy_score(test_y, pred_y)
print(accuracy)

0.7326732673267327


In [None]:
# Logistic Regression
def Logistic():
    logistic_model = Sequential()
    logistic_model.add(Dense(3, activation="softmax"))
    return logistic_model

# Compile Model
logistic_model = Logistic()
# Training Model
logistic_model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
# Training Model
print(train_x)
print(train_y)
from sklearn.preprocessing import OneHotEncoder
# Create the encoder
encoder = OneHotEncoder(sparse=False)
# Fit and transform the data
one_hot = encoder.fit_transform(train_y)

# do one hot encoding to lables
logistic_model.fit(train_x, one_hot, batch_size = 32, epochs = 10, verbose = 1)


[[0.16470588 0.21960784 0.14901961 ... 0.57254902 0.57254902 0.51764706]
 [0.69411765 0.7372549  0.75294118 ... 0.19607843 0.17254902 0.16862745]
 [0.4745098  0.48627451 0.50196078 ... 0.3372549  0.31372549 0.31764706]
 ...
 [0.70588235 0.48235294 0.30588235 ... 0.74117647 0.7372549  0.77254902]
 [0.29411765 0.52941176 0.69803922 ... 0.3254902  0.30588235 0.28627451]
 [0.64705882 0.37647059 0.21960784 ... 0.59215686 0.65882353 0.81176471]]
[[1]
 [1]
 [1]
 ...
 [0]
 [0]
 [0]]




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f7848bd12d0>

In [None]:
# Test set
from sklearn.preprocessing import OneHotEncoder
# Create the encoder
encoder = OneHotEncoder(sparse=False)
# Fit and transform the data
one_hot = encoder.fit_transform(test_y)

print(logistic_model.metrics_names)
print(logistic_model.evaluate(test_x, one_hot, verbose = 0))



['loss', 'accuracy']
[5.4310479164123535, 0.5247524976730347]
