In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.spatial.distance import cdist
from scipy import ndimage, misc
from skimage.morphology import skeletonize
import sklearn
from sklearn import linear_model
from sklearn import model_selection 
from tqdm import tqdm
import cv2
import tifffile
import cupy as cp
import matplotlib
from cupyx.scipy import ndimage
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.model_selection import cross_val_score
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import linear_model
import random
import time
from PIL import Image
import pickle
import itertools

In [11]:
def extract_features(image, sigma, centroids):
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()

    im_blur = ndimage.gaussian_filter(cp.array(image), sigma=sigma, mode='constant',cval=0)
    
    im_blur_norm=im_blur*sigma*cp.sqrt(np.pi)

    im_sx = ndimage.sobel(im_blur_norm, axis=1, mode='reflect')
    im_sy = ndimage.sobel(im_blur_norm, axis=0, mode='reflect')
    im_sobel=np.hypot(im_sx, im_sy)

    feats = []
   
    for centroid in centroids:
        x, y = centroid[0], centroid[1]
        density = cp.nanmean(im_blur_norm[x-75: x+75, y-75: y+75])
        grad = cp.nanmean(im_sobel[x-75: x+75, y-75: y+75])
        feats.append([density.get(), grad.get()])

    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()

    feats = np.array(feats)
    return feats

def generate_window(window_size, p, pad, seed):
    np.random.seed(seed)
    start = time.time()
    random_pattern = np.random.rand(window_size, window_size)
    binary_pattern = np.where(random_pattern < p, 1, 0)

    org_locs = np.argwhere(binary_pattern == 1)

    org_locs_scaled = org_locs*200+pad
    pattern_dim_scaled = window_size*200+2*pad
    
    centroids = []
    im = np.zeros((pattern_dim_scaled, pattern_dim_scaled))

    for y, x in org_locs_scaled:
        im[y:y+150,x:x+150] = 255
        centroids.append((y+75, x+75))
    
    return im, centroids, pattern_dim_scaled

In [12]:
seeds = [3,4,7,8,11,12]
#sigmas = list(range(200, 3000, 400))

In [13]:
sigmas_add = [400, 800]

In [15]:
window_size = 40
p = 1/16
pad = 1000

all_feats = []
cents = []

for seed in tqdm(seeds):
    testim, cs, _ = generate_window(window_size, p, pad, seed)
    cents.append(cs)
    for sigma in sigmas_add:
        all_feats.append(extract_features(testim, sigma, cs))
        

100%|███████████████████████████████████████████████████████| 6/6 [01:53<00:00, 18.85s/it]


In [20]:
df = []
lengths = []
centers = []

In [21]:
centroids_vec = list(itertools.chain.from_iterable(cents))

In [22]:
x, y = np.array(centroids_vec)[:,0], np.array(centroids_vec)[:,1]

In [23]:
for i in range(6):
    seedx = np.hstack(all_feats[i*2:2*(i+1)])
    df.append(seedx)
    lengths.append(len(seedx))

In [24]:
lengths

[86, 95, 105, 101, 92, 89]

In [25]:
seeds_vec = [[seed]*lengths[ix] for ix, seed in enumerate(seeds)]

In [26]:
seeds_vec = list(itertools.chain.from_iterable(seeds_vec))

In [33]:
sigmas_df = pd.DataFrame(np.vstack(df))

In [34]:
sigmas_df.shape

(568, 4)

In [35]:
sigmas_df["seeds"] = seeds_vec
sigmas_df["cx"] = x
sigmas_df["cy"] = y

In [36]:
sigmas_df.head()

Unnamed: 0,0,1,2,3,seeds,cx,cy
0,7597.341244,70.007968,7265.171755,42.114888,3,1075,2675
1,7611.521379,67.579849,7536.277455,38.268426,3,1075,3075
2,4215.056702,13.497684,6278.532303,46.010267,3,1075,4275
3,6942.516611,95.25535,8365.924466,67.548575,3,1275,6275
4,5505.440377,46.367691,5316.481195,40.382193,3,1475,8875


In [37]:
titles = [str(sigma)+"_"+feat for sigma in sigmas_add for feat in ["density", "grad"]]

In [38]:
titles.extend(["seeds", "cx", "cy"])

In [39]:
titles

['400_density', '400_grad', '800_density', '800_grad', 'seeds', 'cx', 'cy']

In [40]:
sigmas_df.columns = titles

In [41]:
sigmas_df.head()

Unnamed: 0,400_density,400_grad,800_density,800_grad,seeds,cx,cy
0,7597.341244,70.007968,7265.171755,42.114888,3,1075,2675
1,7611.521379,67.579849,7536.277455,38.268426,3,1075,3075
2,4215.056702,13.497684,6278.532303,46.010267,3,1075,4275
3,6942.516611,95.25535,8365.924466,67.548575,3,1275,6275
4,5505.440377,46.367691,5316.481195,40.382193,3,1475,8875


In [42]:
sigmas_df.to_csv("sigmas_400_800_df.csv", index=False)

In [43]:
df = pd.read_csv("sigmas_400_800_df.csv")

In [44]:
df.head()

Unnamed: 0,400_density,400_grad,800_density,800_grad,seeds,cx,cy
0,7597.341244,70.007968,7265.171755,42.114888,3,1075,2675
1,7611.521379,67.579849,7536.277455,38.268426,3,1075,3075
2,4215.056702,13.497684,6278.532303,46.010267,3,1075,4275
3,6942.516611,95.25535,8365.924466,67.548575,3,1275,6275
4,5505.440377,46.367691,5316.481195,40.382193,3,1475,8875


In [45]:
df.describe()

Unnamed: 0,400_density,400_grad,800_density,800_grad,seeds,cx,cy
count,568.0,568.0,568.0,568.0,568.0,568.0,568.0
mean,9270.414653,59.281641,11907.524348,35.422275,7.501761,4878.169014,5043.661972
std,3298.936463,35.139654,3266.92416,18.138519,3.221948,2326.457858,2322.211659
min,3975.720227,8.630093,3202.038211,2.673049,3.0,1075.0,1075.0
25%,6974.096415,35.240284,9754.310177,20.725176,4.0,2825.0,3025.0
50%,8766.101162,52.777908,11748.281796,34.296799,7.0,4675.0,5075.0
75%,11069.085007,74.072266,14059.979495,48.209166,11.0,7075.0,7075.0
max,26235.030301,235.986109,22290.200567,104.498116,12.0,8875.0,8875.0
