In [1]:
# define the functions 
# to load the image,
# to crop it, and 
# to extract the 
# features = ((RGB)_cluster, % occurancy_cluster)

from sklearn import svm
from sklearn.cluster import KMeans
from matplotlib import pyplot as plt
from glob import glob
import cv2
import os
import numpy as np

N_CLUSTERS = 3


def load_image(path):
    #read image
    img = cv2.imread(path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

def crop_image(img,x=0,y=0):
    h = img.shape[0]/2
    w = img.shape[1]
    cropped_img = img[y:y+h, x:x+w]
    return cropped_img

def fex(img, n_clusters):
    img = img.reshape((img.shape[0] * img.shape[1], 3))
    kmeans = KMeans(n_clusters)
    kmeans.fit(img)

    #the cluster centers are our dominant colour_clusters.
    colour_clusters = kmeans.cluster_centers_
    colour_clusters = colour_clusters.astype(int)

    n_labels = np.arange(0, n_clusters+1)
    (hist, _) = np.histogram(kmeans.labels_, bins=n_labels)
    hist = hist.astype("float")
    hist /= hist.sum()

    #descending order sorting as per frequency count
    colour_clusters = colour_clusters[(-hist).argsort()]
    hist = hist[(-hist).argsort()] 

    #creating empty chart
    chart = np.zeros((50, 500, 3), np.uint8)
    start = 0

    cluster_frequencies = np.array([[]])

    #creating color rectangles
    for i in range(n_clusters):
        end = start + hist[i] * 500

        cluster_frequencies = np.append(cluster_frequencies, hist[i])

        #getting rgb values
        r = colour_clusters[i][0]
        g = colour_clusters[i][1]
        b = colour_clusters[i][2]

        #using cv2.rectangle to plot colour_clusters
        cv2.rectangle(chart, (int(start), 0), (int(end), 50), (r,g,b), -1)
        start = end

        #display chart
        #     plt.figure()
        #     plt.axis("off")
        #     plt.imshow(chart)
        #     plt.show()
        #     print 
        #     print "acluster_frequencies dimension ", cluster_frequencies.ndim
        #     print cluster_frequencies
    outarr = np.zeros(len(colour_clusters)*4)
    for i in range(len(colour_clusters)):
        outarr[4*i:4*i+3] = colour_clusters[i]
        outarr[4*(i+1)-1] = cluster_frequencies[i]
        
    return outarr

main_path = '/Users/frodo/tf_files/deckchair/'

features = []
labels = []
extension = '.jpg'
for directory in glob(main_path+'*'):
    print "TRAINING ON " + directory
    for img in os.listdir(directory):
        if img.endswith(extension):
            img_path = os.path.join(directory,img)
            img = load_image(img_path)
            feature = fex(img,N_CLUSTERS)
            features.append(feature)
            label = directory.split('/')[-1]
            labels.append(label)        
features = np.array(features)
labels = np.array(labels)

TRAINING ON /Users/frodo/tf_files/deckchair/gray sky
TRAINING ON /Users/frodo/tf_files/deckchair/sunset


In [58]:
# span over the parameter space
# to find the best C and gamma values

from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score

def svc_param_selection(X, y):
    Cs = [0.001, 0.01, 0.1, 1, 10]
    gammas = [0.001, 0.01, 0.1, 1]
    param_grid = {'C': Cs, 'gamma' : gammas}
    grid_search = GridSearchCV(svm.SVC(), param_grid)
    grid_search.fit(X, y)
    par = grid_search.best_params_
    return par

def knn_param_selection(X,y):
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X, y)
    scores = cross_val_score(knn, X, y, cv=10, scoring='accuracy')
    print(scores.mean())
    k_range = list(range(1, 31))
    k_scores = []
    for k in k_range:
        knn = KNeighborsClassifier(n_neighbors=k)
        scores = cross_val_score(knn, X, y, cv=10, scoring='accuracy')
        k_scores.append(scores.mean())
    print(k_scores)
    
    import  matplotlib.pyplot as plt
    %matplotlib inline

    # plot the value of K for KNN (x-axis) versus the cross-validated accuracy (y-axis)
    plt.plot(k_range, k_scores)
    plt.xlabel('Value of K for KNN')
    plt.ylabel('Cross-Validated Accuracy')

    par = svc_param_selection(features, labels)
    # print par
    # setup the classifier
    clf = svm.SVC(C=par['C'],\
                  gamma=par['gamma'],\
                  probability=True)
    clf.fit(features, labels)
    # knn_param_selection(features,labels)

SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [64]:
# create dictionary with results

from PIL import Image, ImageDraw, ImageFont
from sklearn import metrics

# RESULTS_PATH = '/Users/frodo/Desktop/results_classifier/'
RESULTS_PATH = '/Users/frodo/Desktop/results_classifier_3clus/'

test_directory = '/Users/frodo/Desktop/timestamp-selected-sunset_resize'
test_feature = np.zeros(len(features)*4)
test_features = []
results = {}
font_type = ImageFont.truetype('Arial.ttf',16)
for img in os.listdir(test_directory):
    if img.endswith('.jpg'):
        img_path = os.path.join(test_directory,img)
        image = load_image(img_path)
        cropped_img = crop_image(image)        
        test_feature = fex(cropped_img,N_CLUSTERS)
#         test_feature = fex(image,N_CLUSTERS)
        test_features.append(test_feature)
        probability = clf.predict_proba([test_feature])
        predicted_label = clf.predict([test_feature])
        image = Image.open(img_path)
        draw = ImageDraw.Draw(image)
        draw.text(xy=(5,5),\
                  text=clf.classes_[0] + ' ' +\
                  str(round(probability[0][0],2)),\
                  fill=(0,0,0), font=font_type)
        draw.text(xy=(5,25),\
                  text=clf.classes_[1] + ' ' +\
                  str(round(probability[0][1],2)),\
                  fill=(0,0,0), font=font_type)
        draw.text(xy=(5,45),\
                  text='predicted: ' + predicted_label[0],\
                  fill=(0,0,0), font=font_type)
        image.save(os.path.join(RESULTS_PATH,img), "JPEG")
predicted_labels = clf.predict(test_features)

In [None]:
# trial to plot the SVM 

from mlxtend.plotting import plot_decision_regions

label_ids = []
for label in labels:
    if label == 'sunset':
        label_id = 1
        label_ids.append(label_id)
    else:
        label_id = 0
        label_ids.append(label_id)
label_ids = np.array(label_ids)

# try to use a hard-coded dictionary
# tro try the filler_feature_values argument 
# in plot_decision_regions, as this is required 
# when working with more than two features
# d1 = {1:10,2:20,4:60,5:50,
#      6:50,7:90,8:80,9:60,10:10,
#      11:30,12:30,13:40,14:10,15:40,
#      16:30,17:40,18:50,19:20}
    
# d2 = {1:100,2:200,4:6,5:5,
#      6:50,7:90,8:80,9:60,10:10,
#      11:30,12:30,13:40,14:10,15:40,
#      16:30,17:40,18:50,19:20}


    
# plot_decision_regions(features, label_ids, clf=clf,
#                       feature_index=[0,3],
#                       filler_feature_values=d,
#                       filler_feature_ranges=d,
#                       legend=2, ax=ax)

In [None]:
# plot the single features.
# in particular, plot each colour 
# of each cluster as a function
# of its occurrancy

plt.figure(1)
plt.clf()
axis_labels = ['Red cluster_1', 'Green cluster_1', 'Blue cluster_1', '% occurrancy cluster_1',
               'Red cluster_2', 'Green cluster_2', 'Blue cluster_2', '% occurrancy cluster_2',
               'Red cluster_3', 'Green cluster_3', 'Blue cluster_3', '% occurrancy cluster_3',
               'Red cluster_4', 'Green cluster_4', 'Blue cluster_4', '% occurrancy cluster_4',
               'Red cluster_5', 'Green cluster_5', 'Blue cluster_5', '% occurrancy cluster_5']

path = '/Users/frodo/Desktop/plots/'

for i in range(5):
    plt.scatter(features[:,(4*i)+2], features[:,4*i+3], 
                c=label_ids, cmap=plt.cm.Paired)
    plt.xlabel(axis_labels[(4*i)+2])
    plt.ylabel(axis_labels[4*i+3])
    plot_name = str(axis_labels[(4*i)+2] + ' vs ' + axis_labels[4*i+3])
    plt.savefig(path + plot_name)
    plt.show()

In [5]:
features.shape

(736, 12)

In [57]:
from sklearn.model_selection import GridSearchCV
from sklearn.externals import joblib


def save_json(path, dictionary):
    """
    Save a json file in a given path

    """
    f = open(path, 'w')
    loader = json.dump(dictionary, f, indent=4, separators=(',', ': '))
    f.close()


def svc_param_selection(features, labels):
    """
    It performs a search for the best combination
    of hyperparameters (C, gamma) for the support vector machine
    using GridSearchCV. 

    It takes two numpy arrays as input:
      - features has to be a 2D np array
      - labels has to be a 1D np array

    It returns a dictionary with 'C' and 'gamma' as keys
    and the found values as values, e.g. 

    {'C': 1, 'gamma': 0.001}

    """
    Cs = [0.001, 0.01, 0.1, 1, 10]
    gammas = [0.001, 0.01, 0.1, 1]
    param_grid = {'C': Cs, 'gamma': gammas}
    grid_search = GridSearchCV(svm.SVC(), param_grid)
    grid_search.fit(features, labels)
    par = grid_search.best_params_
    return par

par = svc_param_selection(features, labels)
print par
# img_dict = dict()
# results = dict()
from collections import defaultdict
results = defaultdict(dict)
print results
EXTENSION = '.jpg'
TEST_DIR = '/Users/frodo/Desktop/timestamp-selected-sunset_resize/'
clf = svm.SVC(C=par['C'],
              gamma=par['gamma'],
              probability=True)
print "Fitting the data"
clf.fit(features, labels)
print clf.classes_
for img in os.listdir(TEST_DIR):
    if img.endswith(EXTENSION):
        img_path = os.path.join(TEST_DIR, img)
        print img_path
        image = load_image(img_path)
        # uncomment the line below if sky is always approximately in the upper half of the images
        # image = crop_to_half(image)
        test_feature = fex(image, N_CLUSTERS)
        probability = clf.predict_proba([test_feature])
        for i, classes in enumerate(clf.classes_):
            print clf.classes_[i], round(probability[0][i], 3)
#             img_dict[clf.classes_[i]] = round(probability[0][i], 3)
            results[img][clf.classes_[i]] = round(probability[0][i], 3)
print results

{'C': 10, 'gamma': 0.001}
defaultdict(<type 'dict'>, {})
Fitting the data
['gray sky' 'sunset']
/Users/frodo/Desktop/timestamp-selected-sunset_resize/1529032200_0_2018-06-15-04-10-00.jpg
gray sky 0.256
sunset 0.744
/Users/frodo/Desktop/timestamp-selected-sunset_resize/1529118900_0_2018-06-16-04-15-00.jpg
gray sky 0.255
sunset 0.745
/Users/frodo/Desktop/timestamp-selected-sunset_resize/1529550900_0_2018-06-21-04-15-00.jpg
gray sky 0.298
sunset 0.702
/Users/frodo/Desktop/timestamp-selected-sunset_resize/1529354400_0_2018-06-18-21-40-00.jpg
gray sky 0.473
sunset 0.527
/Users/frodo/Desktop/timestamp-selected-sunset_resize/1529008800_0_2018-06-14-21-40-00.jpg
gray sky 0.284
sunset 0.716
/Users/frodo/Desktop/timestamp-selected-sunset_resize/1529465100_0_2018-06-20-04-25-00.jpg
gray sky 0.483
sunset 0.517
/Users/frodo/Desktop/timestamp-selected-sunset_resize/1529205600_0_2018-06-17-04-20-00.jpg
gray sky 0.255
sunset 0.745
/Users/frodo/Desktop/timestamp-selected-sunset_resize/1529206500_0_2018

gray sky 0.425
sunset 0.575
/Users/frodo/Desktop/timestamp-selected-sunset_resize/1529032800_0_2018-06-15-04-20-00.jpg
gray sky 0.279
sunset 0.721
/Users/frodo/Desktop/timestamp-selected-sunset_resize/1529354100_0_2018-06-18-21-35-00.jpg
gray sky 0.482
sunset 0.518
/Users/frodo/Desktop/timestamp-selected-sunset_resize/1529463900_0_2018-06-20-04-05-00.jpg
gray sky 0.456
sunset 0.544
/Users/frodo/Desktop/timestamp-selected-sunset_resize/1529355900_0_2018-06-18-22-05-00.jpg
gray sky 0.543
sunset 0.457
/Users/frodo/Desktop/timestamp-selected-sunset_resize/1529292900_0_2018-06-18-04-35-00.jpg
gray sky 0.96
sunset 0.04
/Users/frodo/Desktop/timestamp-selected-sunset_resize/1529353500_0_2018-06-18-21-25-00.jpg
gray sky 0.538
sunset 0.462
/Users/frodo/Desktop/timestamp-selected-sunset_resize/1529096400_0_2018-06-15-22-00-00.jpg
gray sky 0.255
sunset 0.745
/Users/frodo/Desktop/timestamp-selected-sunset_resize/1529528100_0_2018-06-20-21-55-00.jpg
gray sky 0.274
sunset 0.726
/Users/frodo/Desktop/t