In [1]:
import os
import sys
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

In [2]:
root = "./boat-types-recognition/images/"

In [3]:
## Original Data Size
print("Calculating original data size ...")

count = 0
heights=[]
widths=[]
for path, subdirs, files in os.walk(root):
    for name in files:
        if name.find(".DS_Store") == -1:
            img_path = os.path.join(path, name)
            img = Image.open(img_path).convert('L')
            w,h = img.size
            widths.append(w)
            heights.append(h)
            count += 1
            sys.stdout.write("Progress calculating: {:.2%}\r".format(count/(1462)))
            sys.stdout.flush()
            
min_height = min(heights)
min_width = min(widths)
total = count

Calculating original data size ...
Progress calculating: 100.00%

In [4]:
print("Number of image: ", total)
print("Min image height: ", min_height)
print("Min image width: ", min_width)

Number of image:  1462
Min image height:  261
Min image width:  309


In [5]:
resize_W,resize_H = 100, 100

In [6]:
#Generating same size, gray picture

count = 0
images = []
labels = []
index = 0
for path, subdirs, files in os.walk(root):
    for name in files:
        if name.find(".DS_Store") == -1:
            img_path = os.path.join(path, name)
            img = Image.open(img_path).convert('L')
            label = index
            img_new = img.resize((resize_W,resize_H))
            img_array = np.array(img_new).reshape(-1)
            images.append(img_array)
            labels.append(label)
            count += 1
            sys.stdout.write("Progress calculating: {:.2%}\r".format(count/(1462)))
            sys.stdout.flush()
    index += 1
            
X = np.array(images)
Y = np.array(labels)            

Progress calculating: 100.00%

In [7]:
X.shape

(1462, 10000)

In [8]:
## Split Test / Train
print("Spliting Test / Train sets ...")

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

Spliting Test / Train sets ...


In [9]:
## Data Size (after spliting)
print("X train shape: ", X_train.shape)
print("Y train size: ", y_train.size)
print("X test shape: ", X_test.shape)
print("Y test size: ", y_test.size)

X train shape:  (1023, 10000)
Y train size:  1023
X test shape:  (439, 10000)
Y test size:  439


In [14]:
# Set the parameter candidates
parameter_candidates = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]

# Create a classifier with the parameter candidates
clf = GridSearchCV(estimator=svm.SVC(), param_grid=parameter_candidates, n_jobs=-1)

# Train the classifier on training data
clf.fit(X_train, y_train)

# Print out the results 
print('Best "score" for training data:', clf.best_score_)
print('Best "c":',clf.best_estimator_.C)
print('Best "kernel":',clf.best_estimator_.kernel)
print('Best "gamma":',clf.best_estimator_.gamma)



Best "score" for training data: 0.33724340175953077
Best "c": 1
Best "kernel": rbf
Best "gamma": 0.001


In [10]:
# Create the SVC model 
print("Training......")
svc_model = svm.SVC(gamma=0.001, C=1., kernel='rbf')

# Fit the data to the SVC model
svc_model.fit(X_train, y_train)

Training......


SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [11]:
# Apply the classifier to the test data, and view the accuracy score
svc_model.score(X_test, y_test)  

# Train and score a new classifier with the grid search parameters
svm.SVC(C=1, kernel='rbf', gamma=0.001).fit(X_train, y_train).score(X_test, y_test)

0.32574031890660593

In [12]:
# Predict the label of "x test"
print("Testing.....")
y_hat = svc_model.predict(X_test)

Testing.....


In [13]:
## Evaluations 
from sklearn.metrics import accuracy_score, auc, precision_score, recall_score, f1_score, roc_curve
print("accuracy: " + str(accuracy_score(y_test, y_hat)))
print("precision: " + str(precision_score(y_test, y_hat, average=None)))
print("recall: " + str(recall_score(y_test, y_hat, average=None)))
print("F1: " + str(f1_score(y_test, y_hat, average=None)))

accuracy: 0.32574031890660593
precision: [0.         0.         0.         0.         0.         0.
 0.         0.         0.32648402]
recall: [0. 0. 0. 0. 0. 0. 0. 0. 1.]
F1: [0.         0.         0.         0.         0.         0.
 0.         0.         0.49225473]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
