In [1]:
# ! pip install scikit-image
# ! pip install plotly==5.15.0

In [1]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
from skimage.io import imread
from skimage.transform import resize, rescale
import plotly.express as px

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [4]:
import glob as glob

In [6]:

input_dir = '/Users/autumn_yngoc/Downloads/Marine Fog/False Bay May-Jul 2022 photos'
categories = ['no_fog', 'fog'] # no_fog as 0, and fog as 1

img_list = [] 
rescaled_list = []
data_list = [] # feature array
label_list = [] # target array

# Loop through the no_fog and fog folders
for category_idx, category in enumerate(categories):
    # Find all the image files
    files = glob.glob(os.path.join(input_dir, category, '*.JPG'))
    for file in files:
        # Read each image file into a 3D array of numbers
        img = imread(file)
        img_list.append(img)
        # img = resize(img, (15, 15))
        # Rescale each image into 5% resolution to reduce runtime
        rescaled_img = rescale(img, 0.05, channel_axis = 2)
        rescaled_list.append(rescaled_img)
        # flatten the 3D numpy arrays into a 1D array, and append it to the feature list
        data_list.append(rescaled_img.flatten()) 
        # Append the category index (0 for 'no_fog', and 1 for 'fog') to the target list
        label_list.append(category_idx)

In [7]:
test_img = img_list[0]
print(test_img.size)
print(test_img.shape)
print(test_img[0][0])

15116544
(1944, 2592, 3)
[ 87  99 137]


In [8]:
test_img = rescaled_list[0]
print(test_img.size)
print(test_img.shape)
print(test_img[0][0])

37830
(97, 130, 3)
[0.33054676 0.3906554  0.54170343]


In [None]:
px.imshow(rescaled_list[10])

In [7]:
# Convert the lists to arrays
data = np.asarray(data_list)
labels = np.asarray(label_list)

In [8]:
# Split 70/30 train/test set
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.3, shuffle=True, stratify=labels)

In [19]:
classifier = SVC()

In [20]:
parameters = [{'gamma': [0.1, 0.01, 0.001, 0.0001], 'C': [1, 10]}]

In [21]:
grid_search = GridSearchCV(classifier, parameters)

grid_search.fit(x_train, y_train)

In [22]:
best_estimator = grid_search.best_estimator_
best_estimator

In [23]:
y_test_pred = best_estimator.predict(x_test)

score = accuracy_score(y_test_pred, y_test)

print('{}% of samples were correctly classified'.format(str(score * 100)))

tn_test, fp_test, fn_test, tp_test = confusion_matrix(y_test, y_test_pred).ravel()
print("Test: ", tn_test, fp_test, fn_test, tp_test)

100.0% of samples were correctly classified
Test:  4 0 0 4


In [24]:
y_test_pred

array([1, 0, 1, 0, 1, 1, 0, 0])

In [25]:
pickle.dump(best_estimator, open('./model.p', 'wb'))

In [26]:
# Use the model to classify all images in a directory
# Output a pandas dataframe with image file name corresponding with the predicted no_fog/fog label

pickled_model = pickle.load(open('./model.p', 'rb'))
input_dir = '/Users/autumn_yngoc/Downloads/Marine Fog/False Bay May-Jul 2022 photos/photos'
files = glob.glob(os.path.join(input_dir, '*.JPG'))

x = []
names = []

for file in files:
    name = file.split('/')[-1]
    names.append(name)
    img = imread(file)
    rescaled_img = rescale(img, 0.05, channel_axis = 2).flatten()
    # Append the rescaled and flattened image array to a list 
    x.append(rescaled_img)    

# Convert the list to a numpy feature array
X = np.asarray(x)
# Get the predicted target array
y_pred = pickled_model.predict(X)

dict = {"Image file" : names, "Predicted label" : y_pred}
df = pd.DataFrame(dict)

In [27]:
df[df['Predicted label'] == 1]

Unnamed: 0,Image file,Predicted label
9,WSCT4439.JPG,1
13,WSCT4411.JPG,1
21,WSCT5531.JPG,1
42,WSCT4968.JPG,1
48,WSCT4997.JPG,1
...,...,...
2045,WSCT5529.JPG,1
2112,WSCT5474.JPG,1
2114,WSCT5306.JPG,1
2143,WSCT5528.JPG,1


In [29]:
df[df['Predicted label'] == 1]['Image file']

9       WSCT4439.JPG
13      WSCT4411.JPG
21      WSCT5531.JPG
42      WSCT4968.JPG
48      WSCT4997.JPG
            ...     
2045    WSCT5529.JPG
2112    WSCT5474.JPG
2114    WSCT5306.JPG
2143    WSCT5528.JPG
2153    WSCT4408.JPG
Name: Image file, Length: 75, dtype: object

In [31]:
df.to_csv('FalseBay_MayJul2022_PredictedFog_SVC.csv')