In [None]:
# Extract car images and apply k-means to determine the dominant colour

##########################
######## SETTINGS ########

# jupyter matplotlib configs:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# file with detected objects
detected_objs_file = 'detected_objects.txt'

# debug flag: set to True to show objects and print size and colour info
debug = False
# how many images to process while debugging
debug_max_counter = 20

##########################
##########################


import os
import re
import json
import random
import numpy as np
import cv2 
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans, MeanShift
from itertools import groupby


path = ''
counter = 0
prev_file = ''
detected_colours = []
       
with open(os.path.expanduser(detected_objs_file), 'r') as file:                  
    detected_objects = json.load(file)

    
keys = list(detected_objects.keys())
random.shuffle(keys)
for path in keys:
    #print (detected_objects[path])
    img = cv2.cvtColor(cv2.imread(os.path.expanduser(path)), cv2.COLOR_BGR2RGB) # read and convert BGR to RGB
 
    for car_obj in detected_objects[path]:
        if car_obj['object']=='car':
            #print(path, obj)
            counter = counter + 1

            if debug:  print (counter, ': ', path)    
            else:      print ('\r', counter, ': ', path, end='')

            if debug & (counter==debug_max_counter+1): break # for debugging

            car_img = img[int(car_obj['y0']):int(car_obj['y1']), int(car_obj['x0']):int(car_obj['x1'])]

            height, width, _ = car_img.shape
            if debug: 
                print ('Dimensions: {}x{}; h/w = {:.2f}; w/h = {:.2f}'.format(width, height, 
                                                                              height/width, width/height))

            # skip oddly shaped objects
            if height/width > 0.9 or height/width <0.3: 
                if debug: print("Skipped because of aspect ratio")
                continue
            else:
                # subset a horizontal line of pixels (30% from the top) and flatten:
                #  (height, width, col_chnls) -> (width, col_chnls)
                line_scan = car_img[int(height*0.3):int(height*0.3+1), 0:width].reshape(-1, 3)

                # Apply K-means to line scan
                kmeans = KMeans(n_clusters = 5).fit(line_scan)
                # sort cluster labels, calculate frequency and pick the most common colour
                cluster_freq = [len(list(group)) for key, group in groupby(sorted(kmeans.labels_))] 
                most_freq_label = cluster_freq.index(max(cluster_freq))
                car_colour = np.rint(kmeans.cluster_centers_[most_freq_label])
                if debug: 
                    print ('Detected colour RGB-8bit: ', car_colour)
                    #print ('label frequency list', cluster_freq, 'most frequent label ', most_freq_label)

                # display image and colour scan line
                if debug:
                    cv2.line(car_img, (0, int(height*0.3)), (width, int(height*0.3)), car_colour, thickness=4)        
                    plt.imshow(car_img, interpolation = 'bicubic')
                    plt.xticks([]), plt.yticks([])  # to hide tick values on X and Y axis
                    plt.show()

                detected_colours.append(car_colour)
            
rgb_raw = np.array(detected_colours, np.float32) # convert to numpy array

print("\nReady")

In [None]:
# Save the dataset of detected files

##########################
######## SETTINGS ########

# where to save the dataset of detected colours
detected_cols_file = 'detected_colours.npy'
##########################

with open(detected_cols_file,'wb') as destination: np.save(destination, rgb_raw)
    
print("\nDectected colours saved to", detected_cols_file)