In [10]:
import os
import sys
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import NMF

In [2]:
root = "./boat-types-recognition/images/"

In [3]:
## Original Data Size
print("Calculating original data size ...")

count = 0
heights=[]
widths=[]
for path, subdirs, files in os.walk(root):
    for name in files:
        if name.find(".DS_Store") == -1:
            img_path = os.path.join(path, name)
            img = Image.open(img_path).convert('L')
            w,h = img.size
            widths.append(w)
            heights.append(h)
            count += 1
            sys.stdout.write("Progress calculating: {:.2%}\r".format(count/(1462)))
            sys.stdout.flush()
            
min_height = min(heights)
min_width = min(widths)
total = count

Calculating original data size ...
Progress calculating: 100.00%

In [4]:
print("Number of image: ", total)
print("Min image height: ", min_height)
print("Min image width: ", min_width)

Number of image:  1462
Min image height:  261
Min image width:  309


In [5]:
resize_W,resize_H = 100, 100

In [6]:
#Generating same size, gray picture

count = 0
images = []
labels = []
index = 0
for path, subdirs, files in os.walk(root):
    for name in files:
        if name.find(".DS_Store") == -1:
            img_path = os.path.join(path, name)
            img = Image.open(img_path).convert('L')
            label = index
            img_new = img.resize((resize_W,resize_H))
            img_array = np.array(img_new).reshape(-1)
            images.append(img_array)
            labels.append(label)
            count += 1
            sys.stdout.write("Progress calculating: {:.2%}\r".format(count/(1462)))
            sys.stdout.flush()
    index += 1
            
X = np.array(images)
Y = np.array(labels)            

Progress calculating: 100.00%

In [7]:
X.shape

(1462, 10000)

In [8]:
## Split Test / Train
print("Spliting Test / Train sets ...")

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

Spliting Test / Train sets ...


In [9]:
## Data Size (after spliting)
print("X train shape: ", X_train.shape)
print("Y train size: ", y_train.size)
print("X test shape: ", X_test.shape)
print("Y test size: ", y_test.size)

X train shape:  (1023, 10000)
Y train size:  1023
X test shape:  (439, 10000)
Y test size:  439


In [11]:
## NMF
print("NMF ...")
components = 1500
nmf = NMF(n_components=components, init='random', random_state=0, tol=0.1, verbose=True)

W = nmf.fit_transform(X_train)
H = nmf.components_
X_train_reduced = W

NMF ...
violation: 1.0
violation: 0.1341189081075392
violation: 0.08871852123358978
Converged at iteration 3


In [17]:
print("X train reduced shape:",X_train_reduced.shape)

X train reduced shape: (1023, 1500)


In [18]:
# Set the parameter candidates
parameter_candidates = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]

# Create a classifier with the parameter candidates
clf = GridSearchCV(estimator=svm.SVC(), param_grid=parameter_candidates, n_jobs=-1)

# Train the classifier on training data
clf.fit(X_train_reduced, y_train)

# Print out the results 
print('Best "score" for training data:', clf.best_score_)
print('Best "c":',clf.best_estimator_.C)
print('Best "kernel":',clf.best_estimator_.kernel)
print('Best "gamma":',clf.best_estimator_.gamma)



Best "score" for training data: 0.34017595307917886
Best "c": 1
Best "kernel": linear
Best "gamma": auto_deprecated


In [27]:
# Create the SVC model 
print("Training......")
svc_model = svm.SVC(gamma=0.001,C=1., kernel='rbf')

# Fit the data to the SVC model
svc_model.fit(X_train_reduced, y_train)

Training......


SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [28]:
# Apply the classifier to the test data, and view the accuracy score
X_test_reduced = nmf.transform(X_test)
svc_model.score(X_test_reduced, y_test)  

# Train and score a new classifier with the grid search parameters

svc_model.fit(X_train_reduced, y_train).score(X_test_reduced, y_test)

violation: 1.0
violation: 0.6374588555027088
violation: 0.43190837045946995
violation: 0.3407747177170469
violation: 0.29555411521196157
violation: 0.2706012719071585
violation: 0.24983628643802416
violation: 0.2340132671995518
violation: 0.22212428763270806
violation: 0.21057149482547494
violation: 0.20483732728045925
violation: 0.1974717514789586
violation: 0.19140476379524676
violation: 0.18616905816639465
violation: 0.17981271213413177
violation: 0.17528718401191468
violation: 0.16971510501575082
violation: 0.16439409425990867
violation: 0.16044604881954663
violation: 0.15707910055734584
violation: 0.15224498591415364
violation: 0.14799075808236578
violation: 0.1428400498984003
violation: 0.1386914865898034
violation: 0.13460241284405688
violation: 0.1313990214208069
violation: 0.12790180653357663
violation: 0.12570381257938684
violation: 0.12169585515017423
violation: 0.11875355238478563
violation: 0.11548756484147656
violation: 0.11257563106406573
violation: 0.11014323723972869
v

0.32574031890660593

In [29]:
# Predict the label of "x test"
print("Testing.....")

y_hat = svc_model.predict(X_test_reduced)

Testing.....


In [31]:
## Evaluations 
from sklearn.metrics import accuracy_score, auc, precision_score, recall_score, f1_score, roc_curve
print("accuracy: " + str(accuracy_score(y_test, y_hat)))
print("precision: " + str(precision_score(y_test, y_hat, average=None)))
print("recall: " + str(recall_score(y_test, y_hat, average=None)))
print("F1: " + str(f1_score(y_test, y_hat, average=None)))

accuracy: 0.32574031890660593
precision: [0.         0.         0.         0.         0.         0.
 0.         0.         0.32574032]
recall: [0. 0. 0. 0. 0. 0. 0. 0. 1.]
F1: [0.         0.         0.         0.         0.         0.
 0.         0.         0.49140893]


In [40]:
# Create the SVC model 
print("Training......")
svc_model = svm.SVC(gamma=0.001,C=100., kernel='linear')

# Fit the data to the SVC model
svc_model.fit(X_train_reduced, y_train)

Training......


SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [41]:
# Apply the classifier to the test data, and view the accuracy score
svc_model.score(X_test_reduced, y_test)  

# Train and score a new classifier with the grid search parameters

svc_model.fit(X_train_reduced, y_train).score(X_test_reduced, y_test)

0.23234624145785876

In [43]:
# Create the SVC model 
print("Training......")
svc_model = svm.SVC(gamma=0.001,C=100., kernel='poly')

# Fit the data to the SVC model
svc_model.fit(X_train_reduced, y_train)

Training......


SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [44]:
# Apply the classifier to the test data, and view the accuracy score
svc_model.score(X_test_reduced, y_test)  

# Train and score a new classifier with the grid search parameters

svc_model.fit(X_train_reduced, y_train).score(X_test_reduced, y_test)

0.1252847380410023