# Flat Histograms for Leonardo Visualization

####Damon Crockett, IPAM

In [73]:
import pandas as pd
import numpy as np
from skimage import color
from skimage.io import imread
from skimage.feature import greycomatrix, greycoprops

## Data and features

In [74]:
df = pd.read_csv("./local_path_savepoint_2_val_channels.csv") # already have average brightness

In [75]:
len(df)

7072

In [76]:
df.n_channels.value_counts()

2    5912
3    1160
dtype: int64

In [80]:
df['random'] = np.random.choice(range(len(df)),size=len(df),replace=False) # for random histogram, before doing time

In [81]:
# now to get mean hue for the color images

In [83]:
hues = []

for i in range(len(df)):
    img = imread(df.local_path.loc[i])
    n_channels = df.n_channels.loc[i]
    
    if n_channels == 3:
        img_color = color.rgb2hsv(img)
        hue = np.mean(img_color[:,:,0])
        hues.append(hue)
    else:
        hues.append("bw")
        
df['hue'] = hues

In [84]:
df.to_csv("./local_path_savepoint_2_val_channels_hue.csv",index=False) # just in case

In [69]:
# now GLCM texture properties

In [87]:
contrast = []
dissimilarity = []
homogeneity = []
ASM = []
energy = []
correlation = []

for i in range(len(df)):
    img = imread(df.local_path.loc[i])
    img_gray = color.rgb2gray(img)
    
    # gray-level co-occurence matrix
    glcm = greycomatrix(img_gray, [1], [0], levels=256, symmetric=True, normed=True)
    
    # glcm texture features
    tmp = greycoprops(glcm, "contrast")
    contrast.append(float(tmp))
    
    tmp = greycoprops(glcm, "dissimilarity")
    dissimilarity.append(float(tmp))
    
    tmp = greycoprops(glcm, "homogeneity")
    homogeneity.append(float(tmp))
    
    tmp = greycoprops(glcm, "ASM")
    ASM.append(float(tmp))
    
    tmp = greycoprops(glcm, "energy")
    energy.append(float(tmp))
    
    tmp = greycoprops(glcm, "correlation")
    correlation.append(float(tmp))
    
df['contrast'] = contrast
df['dissimilarity'] = dissimilarity
df['homogeneity'] = homogeneity
df['ASM'] = ASM
df['energy'] = energy
df['correlation'] = correlation

In [89]:
df.to_csv("/Users/damoncrockett/Desktop/Leo/local_path_savepoint_2_val_channels_hue_glcm.csv",index=False)

In [90]:
# Okay great! Now we can plot. We may add clusters later, but for now, we're good.

## Histogram

In [220]:
df = pd.read_csv("/Users/damoncrockett/Desktop/Leo/local_path_savepoint_2_val_channels_hue_glcm.csv")

In [221]:
num_bins = 52 # because it's a factor of len(df)

In [222]:
hist_var = "local_path"

In [223]:
df.sort(hist_var,inplace=True)

In [224]:
m = len(df)
per_bin = m / num_bins
l = np.repeat(range(num_bins),per_bin)
l = l[:m]

In [225]:
df['bin'] = l # this generates bins for whatever you just sorted on, just sequential integers

In [226]:
thumb_side = 128

In [227]:
px_w = (thumb_side) * num_bins
px_h = (thumb_side) * per_bin

In [228]:
from PIL import Image

In [229]:
thumb_px = (thumb_side,thumb_side)

In [230]:
bins = list(set(list(df['bin'])))

In [231]:
y_group = "n_channels"
y_group_2 = "val"
y_sort = "dissimilarity"
cut = [0,.3,.7,1]

In [232]:
canvas = Image.new('RGB',(px_w,px_h),(0,0,0))  # black canvas

In [233]:
for item in bins:
    # select rows of df in bin
    tmp = df[df['bin']==item]
    
    tmp['y_group_2'] = pd.cut(tmp[y_group_2],cut,labels=False)
    
    tmp = tmp.sort([y_group, 'y_group_2', y_sort], ascending=[True, False, True])
    #tmp = tmp.sort([y_group, 'y_group_2'], ascending=[True, False])
    #tmp = tmp.sort(y_group,ascending=True)
    tmp.reset_index(drop=True,inplace=True)
    
    y_coord = px_h - thumb_side
    x_coord = thumb_side * item
    
    for i in range(len(tmp)):
        thumb = Image.open(tmp.local_path.loc[i])
        thumb.thumbnail(thumb_px,Image.ANTIALIAS)
        canvas.paste(thumb,(x_coord,y_coord))
        y_coord = y_coord - thumb_side
        
canvas.save("./fh_tall52_time_channels_val3_3-7_dissimilarity_TFT.png")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
