In [28]:
import numpy as np
import math
import colorsys
from scipy import ndimage
from scipy import misc
from sklearn.cluster import KMeans
from sklearn.utils import shuffle
import json

In [29]:
#converts 1d array into a 2d array where rows are w long
def arr_reshape(arr,w):
    l = len(arr)
    p = math.ceil(l/w)
    out = []
    for i in range(p):
        out.append(arr[i*w:(i+1)*w])
    return out

#remove every nth column
def remove_col(arr,n_col):
    res = []
    r_max = len(arr)
    c_max = len(arr[0])
    for i in range(r_max):
        row = []
        row.append(arr[i][0])
        for j in range(1,c_max-1):
            if ((j+1)%n_col != 0):
                row.append(arr[i][j])
        row.append(arr[i][c_max-1])
        res.append(row)
    return res
#remove every nth row
def remove_row(arr,n_row):
    res = []
    r_max = len(arr)
    res.append(arr[0])
    for i in range(1,r_max-1):
        if((i+1)%n_row != 0 ):
            res.append(arr[i])
    res.append(arr[r_max-1])
    return res
#re-add columns removed
def add_col(arr,w,n_col): 
    res = []
    h = len(arr)
    l = len(arr[0])
    for i in range(h):
        row = [] 
        arr_i=1
        row.append(arr[i][0])
        for j in range(1,w-1):
            if((j+1)%n_col == 0):
                row.append((np.array(arr[i][arr_i-1])+np.array(arr[i][arr_i]))/2)
            else:
                row.append(arr[i][arr_i])
                arr_i+=1
        row.append(arr[i][l-1])
        res.append(row)
    return res
def add_row(arr,h,n_row):
    res =[]
    res.append(arr[0])
    arr_i = 1
    for i in range(1,h-1):
        if((i+1)%n_row == 0 ):
            res.append((np.array(arr[arr_i-1])+np.array(arr[arr_i]))/2)
        else:
            res.append(arr[arr_i])
            arr_i+=1
    res.append(arr[len(arr)-1])
    return res
#return a,b,c components we use for reconstruction
def svd_recon_components(arr,num_eigenvecs,a_decimals,b_decimals,c_decimals):
    u, s, v = np.linalg.svd(arr)
    return [np.round(u[:, :num_eigenvecs],decimals=a_decimals),np.round(s[:num_eigenvecs],decimals=b_decimals),np.round(v[:num_eigenvecs, :],decimals=c_decimals)]

#runs kmeans on a matrix and creates a kstring
def kmeans_serialize(arr,n_clusts,num_decimals):
    #changing 1d array to 2d array in order run kmeans instead of using some sort of binning algorithm
    sample = list(zip(shuffle(arr.flatten(), random_state=0)[:1500],[0]*1500))
    kmeans = KMeans(n_clusters=n_clusts, random_state=0).fit(sample)
    clusters = np.round(kmeans.cluster_centers_,decimals=num_decimals)[:,0]
    k= list(map(lambda x: kmeans.predict(list(map(lambda y: [y,0],x))),arr))
    k_string = ''.join(list(map(lambda x: ''.join(list(map(chr,x))),k)))
    return clusters.tolist(),k_string

#changes kstrings back to into component matrix. THIS WOULD ESSENTIALLY RUN IN THE BROWSER
def kmeans_unserializer(kstring,clusters,w):
    t= []
    for k in kstring:
        t.append(clusters[ord(k)])
    return arr_reshape(t,w)

In [30]:
#Declaring global vars
h_eigenvecs,s_eigenvecs,v_eigenvecs = (256,256,512) #number of eigenvectors to use for hue,saturation,value (respectively)
decimals_a,decimals_b,decimals_c =(4,2,5) #number of decimal places to use for a,b,c reconstruction components
kstring_clusters =128
img_name ='image11.TIFF'
num_color_clusters = 256
nth_col = 4 #remove every nth col #make sure image width is not divisible by number reason:reeeeeeeeeee
nth_row = 4

In [None]:
%%time
#Read in rgb image. Tested and worked on TIFF and jpg.
img = misc.imread('images/'+img_name).astype(float)
h, w, d = original_shape = tuple(img.shape)
print(original_shape)
assert d == 3 #Basically we want to make sure we have an rgb image not a rgba one

#Doing a double map with colorsys.rgb_to_hsv is slow ;__; so we'll kmeans and then predict
img_arr_sample = shuffle(np.reshape(img, (w * h, d)), random_state=0)[:1000]
color_kmeans = KMeans(n_clusters=num_color_clusters, random_state=0).fit(img_arr_sample)
color_clusters = np.array(list(map(lambda x: colorsys.rgb_to_hsv(x[0]/255,x[1]/255,x[2]/255),color_kmeans.cluster_centers_)))

#use predict to get hsv vals and then reshape in order to column extract hsv
hsv_arr = np.reshape(np.array(list(map(lambda x: color_clusters[x],list(map(color_kmeans.predict,img))))), (w * h, d))

#column extract hue,sat,val arrays and reshape; then remove columns and rows
h_arr = remove_row(remove_col(arr_reshape(hsv_arr[:,0],w),nth_col),nth_row)
s_arr = remove_row(remove_col(arr_reshape(hsv_arr[:,1],w),nth_col),nth_row)
v_arr = remove_row(remove_col(arr_reshape(hsv_arr[:,2],w),nth_col),nth_row)

#svd components needed for reconstruction
h_a,h_b,h_c = svd_recon_components(h_arr,h_eigenvecs,decimals_a,decimals_b,decimals_c)
s_a,s_b,s_c = svd_recon_components(s_arr,s_eigenvecs,decimals_a,decimals_b,decimals_c)
v_a,v_b,v_c = svd_recon_components(v_arr,v_eigenvecs,decimals_a,decimals_b,decimals_c)

#dict for json
js_dict={}
js_dict['shape'] = [w,h]
js_dict['remove'] = [nth_col,nth_row]
#kmeans compression, compress a and c comps.
h_a_cluster,h_a_kstring = kmeans_serialize(h_a,kstring_clusters,decimals_a)
h_c_cluster,h_c_kstring = kmeans_serialize(h_c,kstring_clusters,decimals_c)
js_dict['h_a_kstring'] = h_a_kstring
js_dict['h_a_cluster'] = h_a_cluster
js_dict['h_b'] = h_b.tolist()
js_dict['h_c_kstring'] = h_c_kstring
js_dict['h_c_cluster'] = h_c_cluster

s_a_cluster,s_a_kstring = kmeans_serialize(s_a,kstring_clusters,decimals_a)
s_c_cluster,s_c_kstring = kmeans_serialize(s_c,kstring_clusters,decimals_c)
js_dict['s_a_kstring'] = s_a_kstring
js_dict['s_a_cluster'] = s_a_cluster
js_dict['s_b'] = s_b.tolist()
js_dict['s_c_kstring'] = s_c_kstring
js_dict['s_c_cluster'] = s_c_cluster

v_a_cluster,v_a_kstring = kmeans_serialize(v_a,kstring_clusters,decimals_a)
v_c_cluster,v_c_kstring = kmeans_serialize(v_c,kstring_clusters,decimals_c)   
js_dict['v_a_kstring'] = v_a_kstring
js_dict['v_a_cluster'] = v_a_cluster
js_dict['v_b'] = v_b.tolist()
js_dict['v_c_kstring'] = v_c_kstring
js_dict['v_c_cluster'] = v_c_cluster
with open('new_data.txt', 'w') as outfile:
    json.dump(js_dict, outfile)

(2456, 3680, 3)
CPU times: user 2min 9s, sys: 8.24 s, total: 2min 18s
Wall time: 1min 44s


In [None]:
%%time
h_a_recon =kmeans_unserializer(h_a_kstring,h_a_cluster,h_eigenvecs)
h_c_recon =kmeans_unserializer(h_c_kstring,h_c_cluster,len(h_c[0]))
s_a_recon =kmeans_unserializer(s_a_kstring,s_a_cluster,s_eigenvecs)
s_c_recon =kmeans_unserializer(s_c_kstring,s_c_cluster,len(s_c[0]))
v_a_recon = kmeans_unserializer(v_a_kstring,v_a_cluster,v_eigenvecs)
v_c_recon =kmeans_unserializer(v_c_kstring,v_c_cluster,len(v_c[0]))

h_recon = np.matrix(h_a_recon) * np.diag(h_b) * np.matrix(h_c_recon)
s_recon = np.matrix(s_a_recon) * np.diag(s_b) * np.matrix(s_c_recon)
v_recon = np.matrix(v_a_recon) * np.diag(v_b) * np.matrix(v_c_recon)

img_arr_recon = list(zip(np.asarray(h_recon),np.asarray(s_recon),np.asarray(v_recon)))
img_arr_recon = list(map(lambda a:list(zip(a[0],a[1],a[2])),img_arr_recon))
img_arr_recon = list(map(lambda x:list(map(lambda y: colorsys.hsv_to_rgb(y[0],y[1],y[2]),x)),img_arr_recon))
img_arr_recon = add_row(add_col(img_arr_recon,w,nth_col),h,nth_row)
misc.imsave('images_out/'+img_name,np.asarray(img_arr_recon))

538880