In [45]:
import cv2
import numpy as np
import os
import pandas as pd
import csv
import matplotlib.pyplot as plt
from sklearn.cluster import MiniBatchKMeans
from sklearn.neural_network import MLPClassifier

def plot_gray(image):
    %matplotlib qt
    plt.figure(figsize=(20,15))
    return plt.imshow(image, cmap='Greys_r')

In [46]:
samples_path = './samples/василий/'
image_names = os.listdir(samples_path)

classes_list = ['василий', 'не_василий']

In [47]:
X_list = []
for i, class_ in enumerate(classes_list):
    image_names = os. listdir('./samples/' + class_)
    for image_name in images_names:
        image_path = './samples/' + class_ + '/'+ image_name
        image_class = i
        class_name = class_
        X_list.append((image_class, class_name, image_path))

In [48]:
samples_path = './samples/василий/'
image_names = os.listdir(samples_path)

classes_list = ['василий', 'не_василий']

In [49]:
dico = []
for image_name in image_names:
    img = cv2.imread(samples_path + '/' + image_name)
    
    sift = cv2.SIFT_create()
    kp, des = sift.detectAndCompute(img, None)
    
    for d in des:
        dico.append(d)

In [50]:
len(dico)

4978

In [51]:
k = 100

batch_size = np.size(image_names) * 3
kmeans = MiniBatchKMeans(n_clusters=k, batch_size=batch_size, verbose=1).fit(dico)

Init 1/3 with method: k-means++
Inertia for init 1/3: 30053998.451577
Init 2/3 with method: k-means++
Inertia for init 2/3: 30366549.553077
Init 3/3 with method: k-means++
Inertia for init 3/3: 30398714.813939
Minibatch iteration 1/2700: mean batch inertia: 73647.517341, ewa inertia: 73647.517341 
Minibatch iteration 2/2700: mean batch inertia: 75321.777802, ewa inertia: 73772.607699 
Minibatch iteration 3/2700: mean batch inertia: 74020.972362, ewa inertia: 73791.163966 
Minibatch iteration 4/2700: mean batch inertia: 68158.914109, ewa inertia: 73370.357188 
Minibatch iteration 5/2700: mean batch inertia: 69151.255191, ewa inertia: 73055.132054 
Minibatch iteration 6/2700: mean batch inertia: 73322.911089, ewa inertia: 73075.138843 
Minibatch iteration 7/2700: mean batch inertia: 70041.623591, ewa inertia: 72848.493397 
Minibatch iteration 8/2700: mean batch inertia: 71091.856612, ewa inertia: 72717.248391 
Minibatch iteration 9/2700: mean batch inertia: 73655.044641, ewa inertia: 727

In [52]:

kmeans.verbose = False

histo_list = []

for tup in X_list:
    img = cv2.imread(tup[2],0)
    kp, des = sift.detectAndCompute(img, None)

    histo = np.zeros(k)
    nkp = np.size(kp)

    for d in des:
        idx = kmeans.predict([d])
        histo[idx] += 1/nkp # Because we need normalized histograms, I prefere to add 1/nkp directly

    histo_list.append(histo)

In [54]:

X = np.array(histo_list)
Y = [tup[0] for tup in X_list]

# It's a way to convert species name into an integer
#for s in train.species:
    #Y.append(np.min(np.nonzero(species == s)))

mlp = MLPClassifier(verbose=True, max_iter=600000)
mlp.fit(X, Y)

Iteration 1, loss = 0.70915468
Iteration 2, loss = 0.70725093
Iteration 3, loss = 0.70541800
Iteration 4, loss = 0.70364771
Iteration 5, loss = 0.70193940
Iteration 6, loss = 0.70028842
Iteration 7, loss = 0.69869870
Iteration 8, loss = 0.69717004
Iteration 9, loss = 0.69570487
Iteration 10, loss = 0.69429013
Iteration 11, loss = 0.69292559
Iteration 12, loss = 0.69161316
Iteration 13, loss = 0.69035044
Iteration 14, loss = 0.68913876
Iteration 15, loss = 0.68796944
Iteration 16, loss = 0.68683784
Iteration 17, loss = 0.68574546
Iteration 18, loss = 0.68469100
Iteration 19, loss = 0.68366887
Iteration 20, loss = 0.68266997
Iteration 21, loss = 0.68169468
Iteration 22, loss = 0.68073619
Iteration 23, loss = 0.67978996
Iteration 24, loss = 0.67886109
Iteration 25, loss = 0.67794764
Iteration 26, loss = 0.67703756
Iteration 27, loss = 0.67612823
Iteration 28, loss = 0.67521888
Iteration 29, loss = 0.67430716
Iteration 30, loss = 0.67339443
Iteration 31, loss = 0.67247349
Iteration 32, los

Iteration 280, loss = 0.33809519
Iteration 281, loss = 0.33708982
Iteration 282, loss = 0.33608738
Iteration 283, loss = 0.33508787
Iteration 284, loss = 0.33409139
Iteration 285, loss = 0.33309790
Iteration 286, loss = 0.33210731
Iteration 287, loss = 0.33111966
Iteration 288, loss = 0.33013502
Iteration 289, loss = 0.32915325
Iteration 290, loss = 0.32817431
Iteration 291, loss = 0.32719834
Iteration 292, loss = 0.32622525
Iteration 293, loss = 0.32525499
Iteration 294, loss = 0.32428767
Iteration 295, loss = 0.32332314
Iteration 296, loss = 0.32236142
Iteration 297, loss = 0.32140251
Iteration 298, loss = 0.32044640
Iteration 299, loss = 0.31949308
Iteration 300, loss = 0.31854255
Iteration 301, loss = 0.31759479
Iteration 302, loss = 0.31664978
Iteration 303, loss = 0.31570754
Iteration 304, loss = 0.31476806
Iteration 305, loss = 0.31383131
Iteration 306, loss = 0.31289728
Iteration 307, loss = 0.31196602
Iteration 308, loss = 0.31103749
Iteration 309, loss = 0.31011167
Iteration 

Iteration 539, loss = 0.15573834
Iteration 540, loss = 0.15527531
Iteration 541, loss = 0.15481369
Iteration 542, loss = 0.15435370
Iteration 543, loss = 0.15389491
Iteration 544, loss = 0.15343785
Iteration 545, loss = 0.15298202
Iteration 546, loss = 0.15252789
Iteration 547, loss = 0.15207497
Iteration 548, loss = 0.15162365
Iteration 549, loss = 0.15117379
Iteration 550, loss = 0.15072505
Iteration 551, loss = 0.15027819
Iteration 552, loss = 0.14983248
Iteration 553, loss = 0.14938817
Iteration 554, loss = 0.14894552
Iteration 555, loss = 0.14850410
Iteration 556, loss = 0.14806398
Iteration 557, loss = 0.14762544
Iteration 558, loss = 0.14718839
Iteration 559, loss = 0.14675250
Iteration 560, loss = 0.14631806
Iteration 561, loss = 0.14588517
Iteration 562, loss = 0.14545356
Iteration 563, loss = 0.14502328
Iteration 564, loss = 0.14459447
Iteration 565, loss = 0.14416713
Iteration 566, loss = 0.14374105
Iteration 567, loss = 0.14331632
Iteration 568, loss = 0.14289299
Iteration 

Iteration 824, loss = 0.06985100
Iteration 825, loss = 0.06966975
Iteration 826, loss = 0.06948882
Iteration 827, loss = 0.06930862
Iteration 828, loss = 0.06912905
Iteration 829, loss = 0.06894998
Iteration 830, loss = 0.06877151
Iteration 831, loss = 0.06859367
Iteration 832, loss = 0.06841633
Iteration 833, loss = 0.06823960
Iteration 834, loss = 0.06806343
Iteration 835, loss = 0.06788784
Iteration 836, loss = 0.06771279
Iteration 837, loss = 0.06753831
Iteration 838, loss = 0.06736443
Iteration 839, loss = 0.06719112
Iteration 840, loss = 0.06701839
Iteration 841, loss = 0.06684613
Iteration 842, loss = 0.06667441
Iteration 843, loss = 0.06650337
Iteration 844, loss = 0.06633273
Iteration 845, loss = 0.06616271
Iteration 846, loss = 0.06599325
Iteration 847, loss = 0.06582427
Iteration 848, loss = 0.06565598
Iteration 849, loss = 0.06548805
Iteration 850, loss = 0.06532081
Iteration 851, loss = 0.06515414
Iteration 852, loss = 0.06498789
Iteration 853, loss = 0.06482208
Iteration 

MLPClassifier(max_iter=600000, verbose=True)

In [69]:
X_test_list = []
for i, class_ in enumerate(classes_list):
    image_names = os. listdir('./samples_test/' + class_)

    for image_name in images_names:
        image_path = './samples_test/' + class_ + '/'+ image_name
        image_class = i
        class_name = class_
        X_test_list.append((image_class, class_name, image_path))

In [65]:
#test = pd.read_csv('../input/test.csv')

#result_file = open("sift.csv", "w")
#result_file_obj = csv.writer(result_file)
#result_file_obj.writerow(np.append("id", species))

for tup in X_test_list:
    img = cv2.imread(tup[2],0)
    print(img.shape)
    kp, des = sift.detectAndCompute(img, None)

    x = np.zeros(k)
    nkp = np.size(kp)

    for d in des:
        idx = kmeans.predict([d])
        x[idx] += 1/nkp

    res = mlp.predict_proba([x])
    row = []
    row.append(tup[1])

    for e in res[0]:
        row.append(e)

    #result_file_obj.writerow(row)

#result_file.close()

(30, 136)


AttributeError: 'NoneType' object has no attribute 'shape'