## HSV t-SNE maps with TIME vault data

Damon Crockett, Software Studies, UCSD, damoncrockett@gmail.com

In [64]:
import pandas as pd
import os
import numpy as np

In [11]:
from skimage.io import imread
from skimage.exposure import histogram
from skimage import color

In [None]:
local_path = []

In [41]:
DIR = "/Users/damoncrockett/Desktop/TIME/sample/"

In [42]:
local_path = []
for root,dirs,files in os.walk(DIR):
    for file in files:
        local_path.append(root+file)

In [43]:
df = pd.DataFrame(local_path,columns=['local_path'])

In [46]:
sizedim = []

In [47]:
for path in df.local_path:
    try:
        im = color.rgb2hsv(imread(path))
        sizedim.append(im.shape)
    except:
        sizedim.append("unreadable")

In [48]:
df['sizedim'] = sizedim

In [58]:
df = df[df.sizedim!="unreadable"]

In [68]:
df.reset_index(drop=True,inplace=True)

In [92]:
df.to_csv("./TIME_sample.csv",index=False)

In [66]:
hist_vector = []
for path in df.local_path:
    im = color.rgb2hsv(imread(path))

    h = histogram(im[:,:,0],nbins=8)[0]
    h = h/float(sum(h)) # norming as probabilities

    s = histogram(im[:,:,1],nbins=8)[0]
    s = s/float(sum(s))

    v = histogram(im[:,:,2],nbins=8)[0]
    v = v/float(sum(v))

    vector = np.concatenate((h,s,v))
    hist_vector.append(vector)
        
X = np.vstack(hist_vector)
d = pd.DataFrame(X)

#### Now to send file to R to make the histlines

In [70]:
d.to_csv("./X.csv",index=False)

In [None]:
# Check histlines.R for code; add the histline image files to df

In [79]:
df['histline'] = ["/Users/damoncrockett/Desktop/TIME/histlines/"+str(item)+".jpg" for item in range(0,len(d))]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


#### Now the plotting loop

In [85]:
from sklearn.manifold import TSNE as tsne
from PIL import Image, ImageDraw
from shapely.geometry import Point

In [86]:
crit = np.sqrt(2) # adjacency

In [87]:
thumb_side = 128

In [88]:
PRE = "/Users/damoncrockett/Desktop/TIME/"

In [91]:
num_bin = 3 * int(round( np.sqrt( len(df) / np.pi ) )) # circle that fits square, fill 'factor'==3

tsne_obj = tsne(n_components=2)

subspace_tsne = pd.DataFrame(tsne_obj.fit_transform(X),columns=["x","y"])
subspace_tsne['grid_x'] = pd.cut(subspace_tsne['x'],num_bin,labels=False)
subspace_tsne['grid_y'] = pd.cut(subspace_tsne['y'],num_bin,labels=False)
subspace_tsne['local_path'] = df.local_path

grid_side = num_bin 
px_w = thumb_side * grid_side
px_h = thumb_side * grid_side

x,y = range(grid_side) * grid_side, np.repeat(range(grid_side),grid_side)
grid_list = pd.DataFrame(x,columns=['x'])
grid_list['y'] = y

point = []
for i in range(len(grid_list)):
    point.append(Point(grid_list.x.loc[i],grid_list.y.loc[i]))

grid_list['point'] = point

open_grid = list(grid_list.point)
canvas = Image.new('RGB',(px_w,px_h),(50,50,50))
plotted_dict = {}

plotted = 0
for i in np.random.permutation(np.arange(np.round(len(subspace_tsne)))):
    im = Image.open(subspace_tsne.local_path.loc[i])
    im.thumbnail((thumb_side,thumb_side),Image.ANTIALIAS)
    x = subspace_tsne.grid_x.loc[i] * thumb_side 
    y = subspace_tsne.grid_y.loc[i] * thumb_side 
    grid_point = Point((subspace_tsne.grid_x.loc[i],subspace_tsne.grid_y.loc[i]))

    if grid_point in open_grid:
        canvas.paste(im,(x,y))
        open_grid.remove(grid_point)
        plotted_dict[grid_point] = subspace_tsne.local_path.loc[i]
        plotted+=1

    elif len(open_grid) == 0:
        break

    else:
        closest_open = min(open_grid,key=lambda x: grid_point.distance(x))
        if grid_point.distance(closest_open) <= crit: # adjacency criterion
            x = int(closest_open.x) * thumb_side
            y = int(closest_open.y) * thumb_side
            canvas.paste(im,(x,y))
            open_grid.remove(closest_open)
            plotted_dict[closest_open] = subspace_tsne.local_path.loc[i]
            plotted+=1

tagged=0
for grid_point in plotted_dict:         
    closest_open = min(open_grid,key=lambda x: grid_point.distance(x))
    if grid_point.distance(closest_open) == 1: # strict adjacency          
        local_path = plotted_dict[grid_point]
        tmp = df[df.local_path==local_path]
        template = Image.open(tmp.histline.iloc[0])
        template.thumbnail((thumb_side,thumb_side),Image.ANTIALIAS)
        draw = ImageDraw.Draw(template)

        if grid_point.x > closest_open.x:
            draw.rectangle([(120,55),(128,61)],outline=(30, 144, 255),fill=(30, 144, 255))
        if grid_point.x < closest_open.x:
            draw.rectangle([(0,55),(6,61)],outline=(30, 144, 255),fill=(30, 144, 255))
        if grid_point.y < closest_open.y:
            draw.rectangle([(55,0),(61,6)],outline=(30, 144, 255),fill=(30, 144, 255))
        if grid_point.y > closest_open.y:
            draw.rectangle([(55,120),(61,128)],outline=(30, 144, 255),fill=(30, 144, 255))

        x = int(closest_open.x) * thumb_side
        y = int(closest_open.y) * thumb_side
        canvas.paste(template,(x,y))
        open_grid.remove(closest_open)

        tagged+=1

tag_prop = str(np.round(float(tagged)/plotted,2)).lstrip(".")
prop = str(int(round(plotted*100/len(subspace_tsne))))
canvas.save(PRE+"_"+str(num_bin)+"_"+prop+"_"+"_"+str(plotted)+"_"+str(tagged)+"_"+tag_prop+".png")