In [1]:
import cv2
import numpy as np
from collections import defaultdict
from sklearn.cluster import KMeans
from scipy.cluster.vq import vq
from sklearn.linear_model import LogisticRegression
import glob

In [2]:
# dicionário de features por classe
images_features = defaultdict(list)
for img in glob.glob('words/*'):
    im = cv2.imread(img)
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    h, w, _ = im.shape
    features = []
    im_name = img.split('/')[1].split('_')[0]
    for i in range(0, h, 5):
        for j in range(0, w, 5):
            features.append(im[i:i+5, j:j+5, :])
    images_features[im_name] += [feat.reshape(-1) for feat in features if feat.shape == (5, 5, 3)]

In [6]:
images_features

defaultdict(list,
            {'house': [array([ 56,  99, 144,  57, 100, 145,  60, 101, 147,  64, 105, 151,  71,
                     112, 158,  57, 100, 145,  57, 100, 145,  59, 100, 146,  62, 103,
                     149,  67, 108, 154,  58, 101, 146,  58, 101, 146,  57, 100, 145,
                      58, 101, 146,  61, 102, 148,  60, 103, 148,  58, 101, 146,  56,
                      99, 144,  54,  97, 142,  56,  97, 143,  59, 103, 148,  57, 101,
                     146,  54,  98, 143,  51,  95, 140,  51,  94, 139], dtype=uint8),
              array([ 79, 120, 166,  88, 126, 173,  92, 130, 177,  96, 132, 180,  96,
                     132, 180,  74, 115, 161,  82, 120, 167,  85, 123, 170,  90, 126,
                     174,  89, 125, 173,  66, 107, 153,  70, 111, 157,  73, 114, 160,
                      78, 116, 163,  78, 116, 163,  58,  99, 145,  61, 102, 148,  62,
                     103, 149,  67, 105, 152,  68, 106, 153,  52,  95, 140,  54,  97,
                     142,  

In [3]:
# todas as features para calcular os clusters
features = []
for feat in list(images_features.values()):
    features += feat
features = [feat for feat in features]
features = np.array(features)

In [4]:
features

array([[ 56,  99, 144, ...,  51,  94, 139],
       [ 79, 120, 166, ...,  59, 100, 146],
       [ 95, 131, 179, ...,  68, 106, 153],
       ...,
       [255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255]], dtype=uint8)

In [5]:
vocab = KMeans(n_clusters=5)
vocab.fit(X=features)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=5, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=None, tol=0.0001, verbose=0)

In [6]:
# histograma das classes
histogram_features = np.zeros((5, 5), 'float32')
for i in range(5):
    words, distance = vq(list(images_features.values())[i], vocab.cluster_centers_)
    for w in words:
        histogram_features[i][w] += 1

In [7]:
histogram_features

array([[ 5612., 17553., 13931.,  7194., 16511.],
       [ 4901., 16011., 12430.,  1628.,  5470.],
       [ 9095., 20663.,  9603.,  1436., 20175.],
       [22089., 13812.,  7859.,  7476.,  9586.],
       [37641.,  5591.,  8652.,    56.,  6948.]], dtype=float32)

In [8]:
# classificador a partir dos histogramas
clf = LogisticRegression()
clf.fit(histogram_features, np.array(list(images_features.keys())))



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [10]:
def predict(img):
    im = cv2.imread(img)
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    h, w, _ = im.shape
    # descobre as features da imagem
    features = []
    for i in range(0, h, 5):
        for j in range(0, w, 5):
            features.append(im[i:i+5, j:j+5, :])
    images_features = [feat.reshape(-1) for feat in features if feat.shape == (5, 5, 3)]

    # calcula o histograma da imagem
    im_features = np.zeros((1, 5), 'float32')
    words, distance = vq(images_features, vocab.cluster_centers_)
    for w in words:
        im_features[0][w] += 1

    # classifica a imagem
    return clf.predict(im_features)[0]

In [11]:
for img in glob.glob('words/*'):
    print(img, ':' , predict(img))

words/house_2.jpeg : house
words/woman_4.jpeg : woman
words/car_3.jpeg : car
words/man_4.jpeg : man
words/house_4.jpeg : house
words/man_2.jpeg : car
words/house_1.jpeg : car
words/house_3.jpeg : woman
words/house_0.jpeg : house
words/car_1.jpeg : car
words/woman_0.jpeg : woman
words/bike_4.jpeg : bike
words/man_3.jpeg : car
words/bike_1.jpeg : bike
words/woman_3.jpeg : woman
words/man_1.jpeg : bike
words/bike_2.jpeg : bike
words/bike_3.jpeg : house
words/car_0.jpeg : car
words/man_0.jpeg : bike
words/car_2.jpeg : woman
words/car_4.jpeg : house
words/woman_1.jpeg : house
words/woman_2.jpeg : man
words/bike_0.jpeg : bike
