In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2 as cv
import cupy as cp
from cupyx.scipy import ndimage
import matplotlib
from sklearn.preprocessing import StandardScaler

In [31]:
def avg_morphology(im, centroids, org_rad, sigma):
    feats = []
    
    d = org_rad + sigma
    for (c1, c2) in centroids:
        x = int(c2)
        y = int(c1)
        avg = np.mean(im[x-d: x+d, y-d: y+d])
        assert(~np.isnan(avg))
        feats.append(avg.get())

    feats = np.array(feats).reshape(-1,1)
    return feats

In [32]:
from skimage.measure import block_reduce
def avg_pooling(im, centroids, org_rad, sigma, name=None):
    feats = []
    d = org_rad + sigma
    
    feats = []
    for (c1, c2) in centroids:
        x = int(c2)
        y = int(c1)
        window = im[x-d: x+d, y-d: y+d]
        window = window.get()
        
        # incoming 550 by 550 image
        window_reduc = block_reduce(window, (2,2), np.mean)
        window_reduc = block_reduce(window_reduc, (11,11), np.mean)
        window_reduc = block_reduce(window_reduc, (5,5), np.mean)
        # output 5 by 5 image
        
        feats.append(window_reduc.flatten())
        
                
    return np.array(feats)

In [33]:
def extract_features(sigma, centroids, pad=500, org_rad=75):
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()
    
    max_coord = int(np.max(centroids))
    min_coord = int(np.min(centroids))
    
    image = np.zeros((max_coord + min_coord + pad*2, max_coord + min_coord + pad*2))
    dCentroids_scaled = centroids+pad
    
    for (c1, c2) in dCentroids_scaled:
        x = int(c2)
        y = int(c1)
        image[x-org_rad:x+org_rad, y-org_rad:y+org_rad] = 255
    
    im = cp.array(image)
    im_blur = ndimage.gaussian_filter(im, sigma=sigma, mode = 'constant')
    sobel = ndimage.gaussian_gradient_magnitude(im, sigma=sigma, mode = 'constant')
    laplace = ndimage.gaussian_laplace(im, sigma=sigma, mode = 'constant')

    dfeats = avg_morphology(im_blur, dCentroids_scaled, org_rad, sigma)
    gfeats = avg_morphology(sobel, dCentroids_scaled, org_rad, sigma)
    lfeats = avg_morphology(laplace, dCentroids_scaled, org_rad, sigma)
    
    feats = np.hstack([dfeats, gfeats, lfeats])
    return feats

# Round 1 

In [34]:
density_cols = ["d"+str(i) for i in range(1,26)]
grad_cols = ["g"+str(i) for i in range(1,26)]

In [35]:
all_cols = density_cols + grad_cols

In [36]:
df1 = pd.read_csv("datasets/round_1/combined/big_df1.csv")
df2 = pd.read_csv("datasets/round_1/combined/big_df2.csv")
df3 = pd.read_csv("datasets/round_1/combined/big_df3.csv")
df4 = pd.read_csv("datasets/round_1/combined/big_df4.csv")
df5 = pd.read_csv("datasets/round_1/combined/big_df5.csv")
df6 = pd.read_csv("datasets/round_1/combined/big_df6.csv")

In [42]:
dfs = [df1, df2, df3, df4, df5, df6]
all_feats = []
all_targets = []
for df in dfs:
    dcoords = np.array(list(zip(df.dx.values, df.dy.values)))
    sigma_feats = []
    for sigma in [100, 200, 300, 400, 500]:
        feats = extract_features(sigma, dcoords)
        sigma_feats.append(feats)
    targs = df.iloc[:, 14:16].values
    sigma_feats = np.hstack(sigma_feats)
    all_feats.append(sigma_feats)
    all_targets.append(targs)

all_feats = np.vstack(all_feats)
all_targets = np.vstack(all_targets)

In [43]:
all_feats.shape

(1826, 15)

In [44]:
all_targets.shape

(1826, 2)

In [54]:
scaler = StandardScaler()

In [55]:
names = []
for sig in [100, 200, 300, 400, 500]:
    dname = 'd'+str(sig)
    gname = 'g'+str(sig)
    lname = 'l'+str(sig)
    names.append(dname)
    names.append(gname)
    names.append(lname)

In [56]:
len(names)

15

In [57]:
round1_avg_dens_grad_targs = pd.DataFrame(np.hstack([all_feats, all_targets]), columns=names+["Dipole"]+["Elongation"])

In [58]:
round1_avg_dens_grad_targs_stdScl = pd.DataFrame(np.hstack([scaler.fit_transform(all_feats), all_targets]), columns=names+["Dipole"]+["Elongation"])

In [29]:
#avg_dens_grad_targs = pd.DataFrame(np.hstack([all_feats, all_targets]), columns=all_cols+["Dipole"]+["Elongation"])
avg_dens_grad_dipole = pd.DataFrame(np.hstack([all_feats, all_targets[:, 0].reshape(-1,1)]), columns=all_cols+["Dipole"])
#avg_dens_grad_elong = pd.DataFrame(np.hstack([all_feats, all_targets[:, 1].reshape(-1,1)]), columns=all_cols+["Elongation"])
#avg_dens_dipole = pd.DataFrame(np.hstack([all_feats[:, :25], all_targets[:, 0].reshape(-1,1)]), columns=all_cols[:25]+["Dipole"])
#avg_dens_elong = pd.DataFrame(np.hstack([all_feats[:,:25], all_targets[:, 1].reshape(-1,1)]), columns=all_cols[:25]+["Elongation"])
#avg_grad_dipole = pd.DataFrame(np.hstack([all_feats[:,25:], all_targets[:, 0].reshape(-1,1)]), columns=all_cols[25:]+["Dipole"])
#avg_grad_elong = pd.DataFrame(np.hstack([all_feats[:,25:], all_targets[:, 1].reshape(-1,1)]), columns=all_cols[25:]+["Elongation"])


In [59]:
round1_avg_dens_grad_targs.to_csv("round1_avg_dens_grad_targs.csv", index=False)

In [60]:
round1_avg_dens_grad_targs_stdScl.to_csv("round1_avg_dens_grad_targs_stdScl.csv", index=False)

In [61]:
round1_avg_dens_grad_targs_stdScl.shape

(1826, 17)

# Repeat for Round 0

In [67]:
df0 = pd.read_csv("datasets/round_0/combined/all_sigmas_df_comb.csv")

In [68]:
useeds = np.unique(df0.seeds)

In [69]:
all_feats = []
all_targets = []
for seed in useeds:
    df = df0[df0.seeds == seed]
    dcoords = np.array(list(zip(df.cx.values, df.cy.values)))
    
    sigma_feats = []
    for sigma in [100, 200, 300, 400, 500]:
        feats = extract_features(200, dcoords)
        sigma_feats.append(feats)
    
    targs = df.iloc[:, -1].values
    all_feats.append(np.hstack(sigma_feats))
    all_targets.append(targs)

In [70]:
all_feats = np.vstack(all_feats)

In [71]:
all_feats.shape

(568, 15)

In [75]:
all_targs = np.concatenate(all_targets, axis = 0)

In [76]:
all_targs.shape

(568,)

In [82]:
round_0 = pd.DataFrame(np.hstack([all_feats, all_targs.reshape(-1,1)]), columns=names+["Dipole"])

In [83]:
round_0.to_csv("datasets/round_0/combined/round0_avg_dens_grad_lap_dipole.csv", index=False)

In [84]:
round_0scl = pd.DataFrame(np.hstack([scaler.fit_transform(all_feats), all_targs.reshape(-1,1)]), columns=names+["Dipole"])

In [85]:
round_0scl.to_csv("datasets/round_0/combined/round0_avg_dens_grad_lap_dipole_stdScl.csv", index=False)

# Pooling: Combine Round0 and Round1 

In [139]:
df1 = pd.read_csv("datasets/round_1/combined/round1_pool_dens_grad_dipole.csv")

In [140]:
df1.shape

(1826, 51)

In [141]:
df_c = pd.concat([round_0, df1], axis = 0)

In [109]:
df_c.to_csv("datasets/round_1/combined/combined_pool_dens_grad_dipole.csv", index = False)

In [110]:
df_c_d = pd.concat([df_c.iloc[:, :25], df_c.iloc[:, -1]], axis = 1)

In [111]:
df_c_g = df_c.iloc[:, 25:]

In [112]:
df_c_d.to_csv("datasets/round_1/combined/combined_pool_dens_dipole.csv", index = False)

In [113]:
df_c_g.to_csv("datasets/round_1/combined/combined_pool_grad_dipole.csv", index = False)

# Averaging: Combine Round 0 and 1

In [88]:
df1 = pd.read_csv("datasets/round_1/combined/round1_avg_dens_grad_lap_targs.csv")

In [89]:
df0 = pd.read_csv("datasets/round_0/combined/round0_avg_dens_grad_lap_dipole.csv")

In [90]:
df1_scl = pd.read_csv("datasets/round_1/combined/round1_avg_dens_grad_lap_targs_stdScl.csv")

In [91]:
df0_scl = pd.read_csv("datasets/round_0/combined/round0_avg_dens_grad_lap_dipole_stdScl.csv")

In [92]:
df_c = pd.concat([df1.iloc[:, :-1], df0], axis = 0)

In [93]:
df_c.to_csv("datasets/round_1/combined/comb_avg_dens_grad_lap_dipole.csv", index = False)

In [94]:
df_c_scl = pd.concat([df1_scl.iloc[:, :-1], df0_scl], axis = 0)

In [96]:
df_c_scl.to_csv("datasets/round_1/combined/comb_avg_dens_grad_lap_dipole_stdScl.csv", index = False)

In [97]:
df_c_scl.shape

(2394, 16)

In [98]:
df_c_scl.corr()

Unnamed: 0,d100,g100,l100,d200,g200,l200,d300,g300,l300,d400,g400,l400,d500,g500,l500,Dipole
d100,1.0,0.490458,-0.24993,0.908464,0.768262,-0.748517,0.777883,0.614512,-0.701005,0.685522,0.456177,-0.617269,0.629384,0.35124,-0.53841,-0.013954
g100,0.490458,1.0,-0.738292,0.272261,0.553306,-0.463244,0.203798,0.32634,-0.294512,0.187226,0.255517,-0.233864,0.183647,0.242947,-0.213101,0.169461
l100,-0.24993,-0.738292,1.0,0.088786,-0.324943,0.473639,0.175419,-0.121104,0.151869,0.170033,-0.106533,0.055577,0.150605,-0.133817,0.035352,-0.205866
d200,0.908464,0.272261,0.088786,1.0,0.647773,-0.521919,0.95245,0.610716,-0.652609,0.878192,0.475894,-0.657631,0.820163,0.366686,-0.620404,-0.101592
g200,0.768262,0.553306,-0.324943,0.647773,1.0,-0.752803,0.499187,0.824079,-0.681564,0.408074,0.609195,-0.566266,0.361013,0.471765,-0.461482,0.140213
l200,-0.748517,-0.463244,0.473639,-0.521919,-0.752803,1.0,-0.276582,-0.55954,0.833979,-0.14841,-0.417021,0.609733,-0.094118,-0.340109,0.437195,-0.1373
d300,0.777883,0.203798,0.175419,0.95245,0.499187,-0.276582,1.0,0.509141,-0.442727,0.977941,0.408556,-0.529472,0.941461,0.318008,-0.566584,-0.160749
g300,0.614512,0.32634,-0.121104,0.610716,0.824079,-0.55954,0.509141,1.0,-0.664071,0.41338,0.899868,-0.639198,0.352788,0.730187,-0.54721,0.14362
l300,-0.701005,-0.294512,0.151869,-0.652609,-0.681564,0.833979,-0.442727,-0.664071,1.0,-0.268835,-0.555001,0.904358,-0.168619,-0.457392,0.722855,-0.102768
d400,0.685522,0.187226,0.170033,0.878192,0.408074,-0.14841,0.977941,0.41338,-0.268835,1.0,0.326568,-0.368495,0.98985,0.252646,-0.446851,-0.18858


# Rotations and scaling

In [114]:
from sklearn.preprocessing import StandardScaler

In [115]:
df_c = pd.read_csv("datasets/round_1/combined/combined_pool_dens_grad_dipole.csv")

In [116]:
scaler = StandardScaler()

In [117]:
df_c_scaled = pd.DataFrame(np.hstack([scaler.fit_transform(df_c.values[:, :-1]), df_c.values[:, -1].reshape(-1,1)]), columns = df_c.columns)

In [118]:
df_c_scaled.to_csv("datasets/round_1/combined/combined_pool_dens_grad_dipole_stdScl.csv")

In [119]:
df_c_scaled.values[0][:25]

array([-1.25979306, -1.27440644, -1.282043  , -1.27063146, -1.23901579,
       -1.24809412, -1.26304917, -1.27225439, -1.24155821, -1.17778719,
       -1.17861172, -1.15642666, -1.14491955, -1.10784537, -1.03331318,
       -1.01820497, -0.89036518, -0.81848277, -0.81919263, -0.80767035,
       -0.80163351, -0.55220731, -0.42272924, -0.47814541, -0.57749089])

In [120]:
def rotate(vec):
    rotations = []
    rotations.append(vec)
    
    m = vec.reshape(5,5)
    for i in range(1,4):
        rotations.append(np.rot90(m, i).flatten())
        
    return rotations

def get_rotations(arr):
    new_targs = []
    feats = []
    
    for row in arr:
        new_targs += [row[-1]]*16
        
        dens = row[:25]
        grad = row[25:-1]
        
        drots = rotate(dens)
        grots = rotate(grad)
        
        for drot in drots:
            for grot in grots:
                rot_feats = np.hstack([drot, grot])
                assert(len(rot_feats) == 50)
                feats.append(rot_feats)
                
    return feats, new_targs
                

In [121]:
feats, targs = get_rotations(df_c_scaled.values)

In [122]:
rot_df_c_scaled = np.hstack([np.vstack(feats), np.array(targs).reshape(-1,1)])

In [123]:
rot_df_c_scaled = pd.DataFrame(rot_df_c_scaled, columns=df_c_scaled.columns)

In [124]:
rot_df_c_scaled.to_csv("datasets/round_1/combined/combined_pool_dens_grad_dipole_stdScl_rot.csv", index = False)

In [125]:
rot_df_c_scaled.shape

(38304, 51)

# other

In [68]:
def minmax_scale(im, newmin, newmax):
    print(im.shape)
    im_min = np.min(im)
    im_max = np.max(im)
    
    newim = newmin + ((im - im_min)*(newmax-newmin))/(im_max-im_min)
    
    
    return newim
    

In [99]:
def print_features(sigma, centroids, pad=500, org_rad=75):
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()
    
    max_coord = int(np.max(centroids))
    min_coord = int(np.min(centroids))
    
    image = np.zeros((max_coord + min_coord + pad*2, max_coord + min_coord + pad*2))
    dCentroids_scaled = centroids+pad
    
    for (c1, c2) in dCentroids_scaled:
        x = int(c2)
        y = int(c1)
        image[x-org_rad:x+org_rad, y-org_rad:y+org_rad] = 255
    
    im = cp.array(image)

    im_blur = ndimage.gaussian_filter(im, sigma=sigma, mode = 'constant')
    
    sobel = ndimage.gaussian_gradient_magnitude(im, sigma=sigma, mode = 'constant')
    
    laplace = ndimage.gaussian_laplace(im, sigma=sigma, mode = 'constant')

    avg_pooling(im_blur, dCentroids_scaled, org_rad, sigma, "Gaussian Blur")
    
    avg_pooling(sobel, dCentroids_scaled, org_rad, sigma, "Sobel")
    
    avg_pooling(laplace, dCentroids_scaled, org_rad, sigma, "Laplace")
    
    """
    im2 = avg_pooling(sobel, centroids, org_rad, sigma)
    
    plt.imshow(im1.get())
    plt.show()
    
    plt.imshow(im2.get())
    plt.show()
    
    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()"""

In [None]:
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()