In [766]:
import cv2
import numpy as np
import os
import pandas as pd

import matplotlib.pyplot as plt
from sklearn.cluster import MiniBatchKMeans
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score
import time

import descriptors_getters

SCALE_TO_HEIGHT = 100

def plot_gray(image):
    %matplotlib qt
    plt.figure(figsize=(20,15))
    return plt.imshow(image, cmap='Greys_r')

def image_scale(image, scale_to_height=SCALE_TO_HEIGHT, double_height=False):
    scale_factor = scale_to_height / image.shape[0]
    width  = int(image.shape[1] * scale_factor)
    height = int(image.shape[0] * scale_factor)
    if double_height:
        height = height*2
    image = cv2.resize(image, (width, height), interpolation=cv2.INTER_AREA)
    return image

def preprocess(image, SCALE_TO_HEIGHT=100):
    image = image_scale(image)
    #image = cv2.medianBlur(image, 3)
    #image = cv2.GaussianBlur(image, (3, 3), 0)
    _, image = cv2.threshold(image,0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    #image = canny_threshold(image)
    return image


In [767]:
classes_list = ['василий', 'не_василий']
cols = ['Name', 'Path', "Class"]

In [768]:
lst = []
for i, class_name in enumerate(classes_list):
    image_names = os. listdir('./samples/' + class_name)
    for image_name in image_names:
        image_path = './samples/' + class_name + '/'+ image_name
        lst.append([class_name, image_path, i])
        
data=pd.DataFrame(lst,columns=cols)

In [769]:
y = data['Class']
X = data.drop(['Class'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) #random_state=42
start_time_all = time.time()

In [770]:
def get_descriptors(img):   
    
    return descriptors_getters.get_orb_descriptors(img)


In [771]:
dico = []
for path in X_train['Path']:
    img = cv2.imread(path, 0)
    img = preprocess(img)

    kp, des = get_descriptors(img)
    
    for d in des:
        dico.append(d)

In [772]:
k = 100
#batch_size = np.size(image_names) * 3
batch_size = 64
kmeans = MiniBatchKMeans(n_clusters=k, batch_size=batch_size, verbose=0, random_state=42).fit(dico)#random_state=42

In [773]:
kmeans.verbose = False

histo_list = []

for path in X_train['Path']:
    img = cv2.imread(path,0)
    img = preprocess(img)
    # find the keypoints with STAR
    # compute the descriptors with BRIEF
    kp, des = get_descriptors(img)
    histo = np.zeros(k)
    nkp = np.size(kp)    
    if des is not None:
        for d in des:

            idx = kmeans.predict([d])
            histo[idx] += 1/nkp 

    histo_list.append(histo)

In [774]:
X = np.array(histo_list)
Y = y_train

mlp = MLPClassifier(verbose=False, max_iter=600000, random_state=42) #random_state=42
mlp.fit(X, Y)

MLPClassifier(max_iter=600000, random_state=42)

In [775]:
#test = pd.read_csv('../input/test.csv')

#result_file = open("sift.csv", "w")
#result_file_obj = csv.writer(result_file)
#result_file_obj.writerow(np.append("id", species))
start_time_predict = time.time()
y_pred = []
for i, path in enumerate(X_test['Path']):
    img = cv2.imread(path,0)
    img = preprocess(img)
    #print(img.shape)   
    kp, des = get_descriptors(img)    
    x = np.zeros(k)
    nkp = np.size(kp)
        
    if des is not None:
        for d in des:
            idx = kmeans.predict([d])
            x[idx] += 1/nkp
    res = mlp.predict_proba([x])[0][1]
    row = []
    #for e in res[0]:
        #row.append(e)
    y_pred.append(round(res))
    #result_file_obj.writerow(row)

score = precision_score(y_test.values, y_pred)
#result_file.close()

print('Время предсказания: {}'.format(time.time() - start_time_predict))

Время предсказания: 0.8285980224609375


In [776]:
score

0.75

In [777]:
print('Время работы функции: {}'.format(time.time() - start_time_all))

Время работы функции: 3.4451651573181152
