In [11]:
from sklearn import svm
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score
from matplotlib import pyplot as plt
from glob import glob
import cv2
import os
import numpy as np

N_CLUSTERS = 5


def load_image(path):
    #read image
    img = cv2.imread(path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

def crop_image(img,x=0,y=0):
    h = img.shape[0]/2
    w = img.shape[1]
    cropped_img = img[y:y+h, x:x+w]
    return cropped_img

def fex(img, n_clusters):
    img = img.reshape((img.shape[0] * img.shape[1], 3))
    kmeans = KMeans(n_clusters)
    kmeans.fit(img)

    #the cluster centers are our dominant colour_clusters.
    colour_clusters = kmeans.cluster_centers_
    colour_clusters = colour_clusters.astype(int)

    n_labels = np.arange(0, n_clusters+1)
    (hist, _) = np.histogram(kmeans.labels_, bins=n_labels)
    hist = hist.astype("float")
    hist /= hist.sum()

    #descending order sorting as per frequency count
    colour_clusters = colour_clusters[(-hist).argsort()]
    hist = hist[(-hist).argsort()] 

    #creating empty chart
    chart = np.zeros((50, 500, 3), np.uint8)
    start = 0

    cluster_frequencies = np.array([[]])

    #creating color rectangles
    for i in range(n_clusters):
        end = start + hist[i] * 500

        cluster_frequencies = np.append(cluster_frequencies, hist[i])

        #getting rgb values
        r = colour_clusters[i][0]
        g = colour_clusters[i][1]
        b = colour_clusters[i][2]

        #using cv2.rectangle to plot colour_clusters
        cv2.rectangle(chart, (int(start), 0), (int(end), 50), (r,g,b), -1)
        start = end

        #display chart
        #     plt.figure()
        #     plt.axis("off")
        #     plt.imshow(chart)
        #     plt.show()
        #     print 
        #     print "acluster_frequencies dimension ", cluster_frequencies.ndim
        #     print cluster_frequencies
    outarr = np.zeros(len(colour_clusters)*4)
    for i in range(len(colour_clusters)):
        outarr[4*i:4*i+3] = colour_clusters[i]
        outarr[4*(i+1)-1] = cluster_frequencies[i]

    return outarr

main_path = '/Users/frodo/tf_files/deckchair/'

features = []
labels = []
extension = '.jpg'
for directory in glob(main_path+'*'):
    print "TRAINING ON " + directory
    for img in os.listdir(directory):
        if img.endswith(extension):
            img_path = os.path.join(directory,img)
            img = load_image(img_path)
            feature = fex(img,N_CLUSTERS)
            features.append(feature)
            label = directory.split('/')[-1]
            labels.append(label)
#         print label
        
features = np.array(features)
labels = np.array(labels)

TRAINING ON /Users/frodo/tf_files/deckchair/gray_sky
TRAINING ON /Users/frodo/tf_files/deckchair/sunset


In [12]:
clf = svm.SVC(C=1,gamma=0.001)
clf.fit(features, labels)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [13]:
# create dictionary with results 
test_directory = '/Users/frodo/Desktop/timestamp-selected-sunset_resize'
test_feature = np.zeros(len(features)*4)
test_features = []
predicted_labels = []
results = {}
for img in os.listdir(test_directory):
    if img.endswith('.jpg'):
        img_path = os.path.join(test_directory,img)
        image = load_image(img_path)
        cropped_img = crop_image(image)        
        test_feature = fex(cropped_img,N_CLUSTERS)
        test_features.append(test_feature)
        predicted_label = clf.predict(np.array([test_feature]))
        predicted_labels.append(predicted_label)
        results[img] = predicted_label

In [14]:
from PIL import Image, ImageDraw, ImageFont
import time 

RESULTS_PATH = '/Users/frodo/Desktop/results_classifier/'

# show image test with predicted label
font_type = ImageFont.truetype('Arial.ttf',18)
for k, v in results.iteritems():    
    image = Image.open(os.path.join(test_directory,k))
    label_to_write = str(v[0])
    draw = ImageDraw.Draw(image)
    draw.text(xy=(50,50),text=str(v[0]),fill=(0,0,0),font=font_type)
    image.save(os.path.join(RESULTS_PATH,k), "JPEG")

In [None]:
##### from mlxtend.plotting import plot_decision_regions

label_ids = []
for label in labels:
    if label == 'sunset':
        label_id = 1
        label_ids.append(label_id)
    else:
        label_id = 0
        label_ids.append(label_id)
label_ids = np.array(label_ids)

# try to use a hard-coded dictionary
# tro try the filler_feature_values argument 
# in plot_decision_regions, as this is required 
# when working with more than two features
# d1 = {1:10,2:20,4:60,5:50,
#      6:50,7:90,8:80,9:60,10:10,
#      11:30,12:30,13:40,14:10,15:40,
#      16:30,17:40,18:50,19:20}

# d2 = {1:100,2:200,4:6,5:5,
#      6:50,7:90,8:80,9:60,10:10,
#      11:30,12:30,13:40,14:10,15:40,
#      16:30,17:40,18:50,19:20}


    
# plot_decision_regions(features, label_ids, clf=clf,
#                       feature_index=[0,3],
#                       filler_feature_values=d,
#                       filler_feature_ranges=d,
#                       legend=2, ax=ax)


In [None]:
plt.figure(1)
plt.clf()
axis_labels = ['Red cluster_1', 'Green cluster_1', 'Blue cluster_1', '% occurrancy cluster_1',
               'Red cluster_2', 'Green cluster_2', 'Blue cluster_2', '% occurrancy cluster_2',
               'Red cluster_3', 'Green cluster_3', 'Blue cluster_3', '% occurrancy cluster_3',
               'Red cluster_4', 'Green cluster_4', 'Blue cluster_4', '% occurrancy cluster_4',
               'Red cluster_5', 'Green cluster_5', 'Blue cluster_5', '% occurrancy cluster_5']

path = '/Users/frodo/Desktop/plots/'

for i in range(5):
    plt.scatter(features[:,(4*i)+2], features[:,4*i+3], 
                c=label_ids, cmap=plt.cm.Paired)
    plt.xlabel(axis_labels[(4*i)+2])
    plt.ylabel(axis_labels[4*i+3])
    plot_name = str(axis_labels[(4*i)+2] + ' vs ' + axis_labels[4*i+3])
    plt.savefig(path + plot_name)
    plt.show()
    
