In [1]:
import os
import sys
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import NMF

In [2]:
root = "./boat-types-recognition/cropped/"

In [3]:
## Original Data Size
print("Calculating original data size ...")

count = 0
heights=[]
widths=[]
for path, subdirs, files in os.walk(root):
    for name in files:
        if name.find(".DS_Store") == -1:
            img_path = os.path.join(path, name)
            img = Image.open(img_path).convert('L')
            w,h = img.size
            widths.append(w)
            heights.append(h)
            count += 1
            sys.stdout.write("Progress calculating: {:.2%}\r".format(count/(1585)))
            sys.stdout.flush()
            
min_height = min(heights)
min_width = min(widths)
total = count

Calculating original data size ...
Progress calculating: 100.00%

In [4]:
print("Number of image: ", total)
print("Min image height: ", min_height)
print("Min image width: ", min_width)

Number of image:  1585
Min image height:  10
Min image width:  24


In [5]:
resize_W,resize_H = 100, 100

In [6]:
#Generating same size, gray picture

count = 0
images = []
labels = []
index = 0
for path, subdirs, files in os.walk(root):
    for name in files:
        if name.find(".DS_Store") == -1:
            img_path = os.path.join(path, name)
            img = Image.open(img_path).convert('L')
            label = index
            img_new = img.resize((resize_W,resize_H))
            img_array = np.array(img_new).reshape(-1)
            images.append(img_array)
            labels.append(label)
            count += 1
            sys.stdout.write("Progress calculating: {:.2%}\r".format(count/(1585)))
            sys.stdout.flush()
    index += 1
            
X = np.array(images)
Y = np.array(labels)            

Progress calculating: 100.00%

In [7]:
X.shape

(1585, 10000)

In [8]:
## Split Test / Train
print("Spliting Test / Train sets ...")

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

Spliting Test / Train sets ...


In [9]:
## Data Size (after spliting)
print("X train shape: ", X_train.shape)
print("Y train size: ", y_train.size)
print("X test shape: ", X_test.shape)
print("Y test size: ", y_test.size)

X train shape:  (1109, 10000)
Y train size:  1109
X test shape:  (476, 10000)
Y test size:  476


In [10]:
## NMF
print("NMF ...")
components = 900
nmf = NMF(n_components=components, init='random', random_state=0, tol=0.1, verbose=True)

W = nmf.fit_transform(X_train)
H = nmf.components_
X_train_reduced = W

NMF ...
violation: 1.0
violation: 0.17725105161515078
violation: 0.10240500848096648
violation: 0.08024020895333475
Converged at iteration 4


In [11]:
print("X train reduced shape:",X_train_reduced.shape)

X train reduced shape: (1109, 900)


In [31]:
# Create the SVC model 
print("Training......")
svc_model = svm.SVC(gamma='auto',kernel='rbf',C=1)

# Fit the data to the SVC model
svc_model.fit(X_train_reduced, y_train)
svc_model.score(X_train_reduced, y_train)  

Training......


0.9386834986474302

In [13]:
# testing data nmf
print("NMF for testing data")
X_test_reduced = nmf.transform(X_test)

NMF for testing data
violation: 1.0
violation: 0.6687254595313054
violation: 0.4757744714883664
violation: 0.38517593230034164
violation: 0.340384658787855
violation: 0.30729175949742704
violation: 0.28626036402394195
violation: 0.2704654156741913
violation: 0.2554374581908326
violation: 0.24314997911677394
violation: 0.2293887104629931
violation: 0.22016936085486394
violation: 0.21248416723738967
violation: 0.20504293641288568
violation: 0.19647357723827902
violation: 0.1869608392625761
violation: 0.17802632260198673
violation: 0.17225657378749495
violation: 0.16420539799157083
violation: 0.1574442261025966
violation: 0.15186582379322905
violation: 0.1450884172093351
violation: 0.1382348803207456
violation: 0.1331490616978996
violation: 0.12689018250249653
violation: 0.122626983021143
violation: 0.11833398063357801
violation: 0.11436864808916397
violation: 0.11002611595672332
violation: 0.10609239894429649
violation: 0.10299624391389198
violation: 0.09939152499664568
Converged at iter

In [32]:
#Testing
print("Testing.....")
y_hat = svc_model.predict(X_test_reduced)
svc_model.score(X_test_reduced, y_test)  

Testing.....


0.3907563025210084

In [15]:
## Evaluations 
from sklearn.metrics import accuracy_score, auc, precision_score, recall_score, f1_score, roc_curve
print("accuracy: " + str(accuracy_score(y_test, y_hat)))
print("precision: " + str(precision_score(y_test, y_hat, average=None)))
print("recall: " + str(recall_score(y_test, y_hat, average=None)))
print("F1: " + str(f1_score(y_test, y_hat, average=None)))

accuracy: 0.4054621848739496
precision: [0.         0.         0.         0.         0.4893617  0.
 0.25714286 0.         0.41346154]
recall: [0.         0.         0.         0.         0.46938776 0.
 0.2195122  0.         0.76331361]
F1: [0.         0.         0.         0.         0.47916667 0.
 0.23684211 0.         0.53638254]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [16]:
for i in range(10):
    print(np.count_nonzero(y_test == i))

0
4
65
24
16
98
7
82
11
169


In [17]:
for i in range(10):
    print(np.count_nonzero(y_hat == i))

0
0
0
0
0
94
0
70
0
312


In [18]:
for i in range(10):
    print(np.count_nonzero(y_train == i))

0
26
155
43
22
224
22
236
26
355
