In [2]:
import cv2
import os
import numpy as np
import sklearn
from sklearn.cluster import MiniBatchKMeans
from scipy.spatial import distance
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
import random
from xgboost import XGBClassifier
%pylab inline

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [3]:
def load_images_from_folder(folder):
    images = list()
    labels = list()
    i = 0
    for filename in os.listdir(folder):
        if filename !='.DS_Store':    
            path = folder + "/" + filename
            for cat in os.listdir(path):
                img = cv2.imread(path + "/" + cat,0)
                #img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                if img is not None:
                    images.append(img)
                    labels.append(i)
            i = i + 1
    return images, labels

In [4]:
def descriptor_features(X):
    descriptor_list = []
    akaze = cv2.AKAZE_create()
    for i in range(0, len(X)):
        kp,des = akaze.detectAndCompute(X[i], None)
        descriptor_list.extend(des)
    return descriptor_list

In [5]:
def kmeans(k, descriptor_list):
    kmeans = MiniBatchKMeans(n_clusters=k, n_init=10)
    kmeans.fit(descriptor_list)
    visual_words = kmeans.cluster_centers_
    return visual_words

In [6]:
def find_index(image, center):
    count = 0
    ind = 0
    dist = 0
    for i in range(len(center)):
        if(i == 0):
            count = distance.euclidean(image, center[i])
            dist = count
            #count = L1_dist(image, center[i])
        else:
            dist = distance.euclidean(image, center[i])
            #dist = L1_dist(image, center[i])
        if(dist < count):
            ind = i
            count = dist
    return ind

In [7]:
def image_class(X, centers):
    dict_feature = list()
    akaze = cv2.AKAZE_create()
    for i in range(0, len(X)):
        #print(i)
        kp,des = akaze.detectAndCompute(X[i], None)
        histogram = np.zeros(len(centers))
        for each_feature in des:
            
            ind = find_index(each_feature, centers)
            histogram[ind] += 1
        dict_feature.append(histogram)
    return dict_feature

In [8]:
X, y = load_images_from_folder('/Users/anna/detection/Photos')
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size = 0.2, random_state = 42)
descriptors = descriptor_features(X_train)
words = kmeans(200, descriptors)
X_train_new = image_class(X_train, words)
X_test_new = image_class(X_test, words)

Буда - 83 , Парфум - 89, нічого - 65

In [9]:
c1 = 0
c0 = 0
for i in range(0, len(y)):
    if y[i]==1:
        c1 = c1 + 1
    if y[i] == 0:
        c0 = c0 + 1
print(c1, c0)

89 83


парфум = 1, Буда = 0, нічого = 2( class labels)

### Функція для знаходження статистики по кожному методу 

In [10]:
def get_errors(y1, y2):
    errors = np.zeros((3, 2))
    S = 0
    for i in range(0, len(y1)):
        a = y1[i]
        b = y2[i]
        if a == b:
            S = S + 1
        if a == 0 and b == 1:
            errors[0][0] = errors[0][0] + 1
        if a == 0 and b == 2:
            errors[0][1]+=1
        if a == 1 and b == 0:
            errors[1][0] += 1
        if a == 1 and b == 2:
            errors[1][1] += 1
        if a == 2 and b == 0:
            errors[2][0] += 1
        if a == 2 and b == 1:
            errors[2][1] += 1
    errors = errors/len(y1)
    S = S/len(y1)
    return errors, S

# Вибір класифікатора без підбору параметрів

## Градієнтний бустінг

In [41]:
model = GradientBoostingClassifier()
model.fit(X_train_new, y_train)
y_pred = model.predict(X_test_new)
get_errors(y_test, y_pred)

(array([[0.        , 0.        ],
        [0.02083333, 0.04166667],
        [0.08333333, 0.125     ]]),
 0.7291666666666666)

## Випадковий ліс

In [12]:
model = RandomForestClassifier()
model.fit(X_train_new, y_train)
y_pred = model.predict(X_test_new)
get_errors(y_test, y_pred)

(array([[0.        , 0.04166667],
        [0.02083333, 0.02083333],
        [0.08333333, 0.125     ]]),
 0.7083333333333334)

## Дерево прийняття рішень

In [13]:
model = DecisionTreeClassifier()
model.fit(X_train_new, y_train)
y_pred = model.predict(X_test_new)
get_errors(y_test, y_pred)

(array([[0.04166667, 0.14583333],
        [0.04166667, 0.0625    ],
        [0.0625    , 0.10416667]]),
 0.5416666666666666)

## XGBClassifier

In [14]:
model = XGBClassifier()
arr = np.array(y_train)
arr1 = np.array(X_train_new)
arr2 = np.array(X_test_new)
model.fit(arr1, arr)
y_pred = model.predict(arr2)
get_errors(y_test, y_pred)

(array([[0.        , 0.0625    ],
        [0.02083333, 0.02083333],
        [0.08333333, 0.08333333]]),
 0.7291666666666666)

# Підбір параметрів для кожного з методів за допомогою GridSearch

## GradientBoostingClassifier

In [53]:
parameters = {'min_samples_split':(2,4, 6), 
              'min_samples_leaf':(1, 3,4), 
              'max_depth':(3,5,8),
             'n_estimators':(100, 150)}
m = GradientBoostingClassifier()
model1 = GridSearchCV(m, parameters, return_train_score = True)
model1.fit(X_train_new, y_train)


GridSearchCV(estimator=GradientBoostingClassifier(),
             param_grid={'max_depth': (3, 5, 8), 'min_samples_leaf': (1, 3, 4),
                         'min_samples_split': (2, 4, 6),
                         'n_estimators': (100, 150)},
             return_train_score=True)

In [54]:
%%time
y_pred = model1.predict(X_test_new)
get_errors(y_test, y_pred)

CPU times: user 2.71 ms, sys: 1.09 ms, total: 3.8 ms
Wall time: 2.96 ms


(array([[0.        , 0.04166667],
        [0.04166667, 0.02083333],
        [0.08333333, 0.08333333]]),
 0.7291666666666666)

In [55]:
model1.best_params_

{'max_depth': 5,
 'min_samples_leaf': 4,
 'min_samples_split': 2,
 'n_estimators': 150}

In [56]:
model1.best_score_

0.6716927453769559

## RandomForestClassifier

In [57]:
parameters = {'n_estimators':(100,125, 150), 
              'min_samples_split':(2, 4, 3),
             'min_samples_leaf':(1, 3, 5)}
m = RandomForestClassifier()
model2 = GridSearchCV(m, parameters, return_train_score = True)
model2.fit(X_train_new, y_train)
y_pred = model2.predict(X_test_new)
get_errors(y_test, y_pred)

(array([[0.        , 0.02083333],
        [0.02083333, 0.        ],
        [0.10416667, 0.14583333]]),
 0.7083333333333334)

In [58]:
%%time
y_pred = model2.predict(X_test_new)
get_errors(y_test, y_pred)

CPU times: user 30.4 ms, sys: 2.94 ms, total: 33.4 ms
Wall time: 38 ms


(array([[0.        , 0.02083333],
        [0.02083333, 0.        ],
        [0.10416667, 0.14583333]]),
 0.7083333333333334)

In [59]:
model2.best_params_

{'min_samples_leaf': 1, 'min_samples_split': 4, 'n_estimators': 150}

In [60]:
model2.best_score_

0.6557610241820768

## Decision Tree Classifier

In [61]:
parameters = {'splitter':('best', 'random'),
              'min_samples_split':(2, 4, 6),
              'min_samples_leaf':(1, 3, 4),
              'criterion':('gini', 'entropy')}
m = DecisionTreeClassifier()
model3 = GridSearchCV(m, parameters, return_train_score = True)
model3.fit(X_train_new, y_train)

GridSearchCV(estimator=DecisionTreeClassifier(),
             param_grid={'criterion': ('gini', 'entropy'),
                         'min_samples_leaf': (1, 3, 4),
                         'min_samples_split': (2, 4, 6),
                         'splitter': ('best', 'random')},
             return_train_score=True)

In [62]:
%%time
y_pred = model3.predict(X_test_new)
get_errors(y_test, y_pred)

CPU times: user 635 µs, sys: 21 µs, total: 656 µs
Wall time: 711 µs


(array([[0.0625    , 0.14583333],
        [0.10416667, 0.08333333],
        [0.04166667, 0.08333333]]),
 0.4791666666666667)

In [63]:
model3.best_params_

{'criterion': 'entropy',
 'min_samples_leaf': 1,
 'min_samples_split': 4,
 'splitter': 'random'}

In [64]:
model3.best_score_

0.5554765291607398

## XGBoost Classifier

In [65]:
parameters = {
        'learning_rate': [0.01, 0.1],
        'max_depth': [3, 5, 7, 10],
        'min_child_weight': [1, 3, 5],
        'subsample': [0.5, 0.7],
        'colsample_bytree': [0.5, 0.7],
        'n_estimators' : [100, 200, 500],
        'objective': ['reg:squarederror']
    }
m = XGBClassifier()
model4 = GridSearchCV(m, parameters, return_train_score = True)
arr = np.array(y_train)
arr1 = np.array(X_train_new)
arr2 = np.array(X_test_new)
model4.fit(arr1, arr)


GridSearchCV(estimator=XGBClassifier(base_score=None, booster=None,
                                     colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=None, gamma=None,
                                     gpu_id=None, importance_type='gain',
                                     interaction_constraints=None,
                                     learning_rate=None, max_delta_step=None,
                                     max_depth=None, min_child_weight=None,
                                     missing=nan, monotone_constraints=None,
                                     n_estimators=100, n_jobs=None...
                                     reg_alpha=None, reg_lambda=None,
                                     scale_pos_weight=None, subsample=None,
                                     tree_method=None, validate_parameters=None,
                                     verbosity=None),
             par

In [66]:
%%time
y_pred = model4.predict(arr2)
get_errors(y_test, y_pred)

CPU times: user 5.31 ms, sys: 864 µs, total: 6.18 ms
Wall time: 2.1 ms


(array([[0.        , 0.02083333],
        [0.02083333, 0.02083333],
        [0.10416667, 0.08333333]]),
 0.75)

In [67]:
model4.best_params_

{'colsample_bytree': 0.7,
 'learning_rate': 0.1,
 'max_depth': 3,
 'min_child_weight': 1,
 'n_estimators': 100,
 'objective': 'reg:squarederror',
 'subsample': 0.7}

In [68]:
model4.best_score_

0.6823613086770981

# Робота з відео

За допомогою OpenCV обробимо 2 коротеньких відео, на яких є один чи інший об'єкт, застосовуючи до кожного 5го кадру кожну з моделей; бедмо писати поверх кадру в кожний момент, який саме об'єкт був знайдений і чи був знайдений взагалі. В результаті отримуємо 8 відео

In [85]:
def create_video(file_name_read, file_name_write):
    cap1 = cv2.VideoCapture(file_name_read)
    frame_width = int(cap1.get(3))
    frame_height = int(cap1.get(4))
    out = cv2.VideoWriter(file_name_write,cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height))
    #set method here
    # and don't forget to use appropriate model
    akaze = cv2.AKAZE_create()
    # features for text
    font = cv2.FONT_HERSHEY_SIMPLEX 
    org = (50, 50) 
    fontScale = 1
    color = (255, 0, 0) 
    thickness = 2
    i = 0
    while cap1.isOpened():
        ret, frame = cap1.read() 
        if not ret:
            break
        if i%5 == 0:
            kp,des = akaze.detectAndCompute(frame, None)
            histogram = np.zeros(len(words))
            for each_feature in des:
                ind = find_index(each_feature, words)
                histogram[ind] += 1
            histogram = histogram.reshape(1, -1)
            # трошки криво, тут вручну вказує номер моделі з якою працюємо
            y_pred = model4.predict(histogram)
            if y_pred == 1:
                frame = cv2.putText(frame, 'Parfum', org, font,fontScale, color, thickness, cv2.LINE_AA)
            if y_pred == 0:
                frame = cv2.putText(frame, 'Budda', org, font,fontScale, color, thickness, cv2.LINE_AA)
            if y_pred == 2:
                frame = cv2.putText(frame, 'Nothing', org, font,fontScale, color, thickness, cv2.LINE_AA)
            out.write(frame)
        i = i+1
    cap1.release()
    out.release()
    cv2.destroyAllWindows()

In [81]:
create_video('/Users/anna/detection/video1.mp4', 'Videos/akaze_video1_gradient_boosting.avi')
create_video('/Users/anna/detection/video2.mp4', 'Videos/akaze_video2_gradient_boosting.avi')

In [82]:
create_video('/Users/anna/detection/video1.mp4', 'Videos/akaze_video1_random_forest.avi')
create_video('/Users/anna/detection/video2.mp4', 'Videos/akaze_video2_random_forest.avi')

In [84]:
create_video('/Users/anna/detection/video1.mp4', 'Videos/akaze_video1_decision_tree.avi')
create_video('/Users/anna/detection/video2.mp4', 'Videos/akaze_video2_decision_tree.avi')

In [86]:
create_video('/Users/anna/detection/video1.mp4', 'Videos/akaze_video1_xgboost.avi')
create_video('/Users/anna/detection/video2.mp4', 'Videos/akaze_video2_xgboost.avi')