In [12]:
import pandas as pd
from skimage import color
from skimage.io import imread
from skimage.feature import greycomatrix, greycoprops, hog
import numpy as np

In [13]:
df = pd.read_csv("./local_path.csv")

In [14]:
props = ['contrast','dissimilarity','homogeneity','ASM','energy','correlation']

In [15]:
vector_list = []

for file in df.local_path:    
    img = imread(file)
    img_gray = color.rgb2gray(img)
    
    # average brightness ( I should really norm this ) 
    val_mean = np.mean(img_gray)
    
    # gray-level co-occurence matrix
    glcm = greycomatrix(img_gray, [1], [0], levels=256, symmetric=True, normed=True)
    
    prop_vector = []
    
    # glcm texture features
    for prop in props:
        tmp = greycoprops(glcm, prop)
        prop_vector.append(float(tmp))
        
    # coarse hog features
    fd = hog(img_gray, orientations=4, pixels_per_cell=(32, 32),cells_per_block=(1, 1))
    hog_vector = fd
    
        
    vector_list.append(np.asarray(prop_vector + [val_mean] + hog_vector.tolist()))
    
X = np.vstack(vector_list)

In [16]:
from PIL import Image
from shapely.geometry import Point
import os
from sklearn.cluster import KMeans

In [17]:
for k in [2,4,8,16,32,64]:

    os.mkdir("/Users/damoncrockett/Desktop/Leo/"+str(k))
    
    kmeans = KMeans(n_clusters = k)
    kmeans.fit(X)

    df = df[:len(vector_list)]

    df['clusters'] = kmeans.labels_
    centroids = pd.DataFrame(kmeans.cluster_centers_)
    
    euclidean_distance = []

    feature_points = pd.DataFrame(X)

    for i in range(len(df)):
        tmp = feature_points.loc[i].as_matrix()
        cluster_integer = int(df.clusters.loc[i])

        euclidean_distance_i = np.linalg.norm(tmp - centroids.loc[cluster_integer].as_matrix())
        euclidean_distance.append(euclidean_distance_i)

    df['euclidean_distance'] = euclidean_distance
    
    for cluster_label in range(k):
        tmp = df[df.clusters==cluster_label]
        tmp.sort('euclidean_distance',inplace=True)
        tmp.reset_index(drop=True,inplace=True)
        
        n = len(tmp)
        
        grid_side = int(round( np.sqrt(n) / 0.75 ))
        
        x,y = range(grid_side) * grid_side, np.repeat(range(grid_side),grid_side)
        grid_list = pd.DataFrame(x,columns=['x'])
        grid_list['y'] = y

        point = []
        for i in range(len(grid_list)):
            point.append(Point(grid_list.x.loc[i],grid_list.y.loc[i]))

        grid_list['point'] = point

        open_grid = list(grid_list.point)

        midpoint = int(round( grid_side / 2 ))
        centroid_loc = Point(midpoint,midpoint)
        
        if n > 1:
            open_grid.remove(centroid_loc)
        
        thumb_side = 64

        px_w = thumb_side * grid_side
        px_h = thumb_side * grid_side

        canvas = Image.new('RGB',(px_w,px_h),(50,50,50))
        
        for i in range(n):
            im = Image.open(tmp.local_path.loc[i])
            im.thumbnail((thumb_side,thumb_side),Image.ANTIALIAS)
            closest_open = min(open_grid,key=lambda x: centroid_loc.distance(x))
            x = int(closest_open.x) * thumb_side
            y = int(closest_open.y) * thumb_side
            canvas.paste(im,(x,y))
            open_grid.remove(closest_open)
            
        canvas.save("./"+str(k)+"/"+str(cluster_label)+".png")

A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  inplace=inplace, kind=kind, na_position=na_position)
