# Objective
Assess the fraction of expert annotations needed to get good precision and recall.

# Takeaways
Keep in mind that since each bootstrap sample is randomly sampled from all the expert annotations, we're getting some little spots, some big spots, with total randomness. In reality, if an expert were told e.g. "Oh, just click 50% of the spots," the expert would probably only click the brightest ones.

In [17]:
from math import sqrt
from math import floor
from scipy import optimize
from skimage import data
from skimage.feature import blob_log
from skimage.color import rgb2gray
from skimage.io import imread
from sklearn.neighbors import KDTree
from numpy import pi, r_
import matplotlib.pyplot as plt
import numpy as np
import math, random, sys
sys.path.insert(0, '../../FISH-annotation/Annotation')
import util

Cell below from https://scipy-cookbook.readthedocs.io/items/FittingData.html.

In [3]:
def gaussian(height, center_x, center_y, width_x, width_y):
    """Returns a gaussian function with the given parameters"""
    width_x = float(width_x)
    width_y = float(width_y)
    return lambda x,y: height*np.exp(
                -(((center_x-x)/width_x)**2+((center_y-y)/width_y)**2)/2)

def moments(data):
    """Returns (height, x, y, width_x, width_y)
    the gaussian parameters of a 2D distribution by calculating its
    moments """
    total = data.sum()
    X, Y = np.indices(data.shape)
    x = (X*data).sum()/total
    y = (Y*data).sum()/total
    col = data[:, int(y)]
    width_x = np.sqrt(np.abs((np.arange(col.size)-y)**2*col).sum()/col.sum())
    row = data[int(x), :]
    width_y = np.sqrt(np.abs((np.arange(row.size)-x)**2*row).sum()/row.sum())
    height = data.max()
    return height, x, y, width_x, width_y

def fitgaussian(data):
    """Returns (height, x, y, width_x, width_y)
    the gaussian parameters of a 2D distribution found by a fit"""
    params = moments(data)
    errorfunction = lambda p: np.ravel(gaussian(*p)(*np.indices(data.shape)) -
                                 data)
    p, success = optimize.leastsq(errorfunction, params)
    return p

### Load expert annotations
All fractional samples be sampled from `coords`.

In [6]:
coords = np.genfromtxt('smfish_coords.csv', delimiter=',')

### Set bootstrapping experiment params

In [None]:
fraction_list = np.arange(10, 110, 10)
precision_list_list = []	# one precision list for each fraction
recall_list_list = []		# one recall list for each fraction
num_trials = 5				# number of trials per fraction
margin = 4					# margin around each spot, in pixels

### Run bootstrapping experiment

In [None]:
for fraction in fraction_list:
    precision_list = []
    recall_list = []
    for _ in range(num_trials):
 
        # sample a fraction of the expert annotations
        num_to_sample = floor(fraction * len(coords))
        indices = random.sample(range(len(coords)), num_to_sample)
        coords_sampled = [coords[index] for index in indices]
        
        # run parameter extraction to get intensity and sigma_max value for each spot
        #     imagining that only that coords_sampled exists
        
        max_intensity_list = []
        sigma_max_list = []

        for x, y in coords_sampled:

            x_min = int(x)-margin if int(x)-margin >= 0 else 0
            x_max = int(x)+margin if int(x)+margin < im.shape[1] else im.shape[1]-1
            y_min = int(y)-margin if int(y)-margin >= 0 else 0
            y_max = int(y)+margin if int(y)+margin < im.shape[0] else im.shape[0]-1

            crop = im[y_min:y_max, x_min:x_max]
            params = fitgaussian(crop)
            fit = gaussian(*params)
            (height, x_param, y_param, width_x, width_y) = params
            sigma_min = math.sqrt(min(width_x, width_y)/2)
            sigma_max = math.sqrt(max(width_x, width_y)/2)
            max_intensity = np.amax(crop)

            max_intensity_list.append(max_intensity)
            sigma_max_list.append(sigma_max)
        
        # run blob log, imagining that only that coords_sampled exists, and save precision and recall values
        thresholds = np.arange(0.02, 0.18, 0.005)
        sigma_maxs
        
        
        

    precision_list_list.append(precision_list)
    recall_list_list.append(recall_list)

# Plotting

### precision vs. fraction of expert annotations sampled

In [1]:
plt.figure()
for ind, fraction in enum(fraction_list):
    plt.scatter([fraction]*num_trials, precision_list_list[ind], c='cyan')
    plt.scatter([fraction], [np.mean(precision_list_list[ind])], c='orange')
plt.xlabel('fraction of expert annotations sampled')
plt.ylabel('precision')
plt.show()

SyntaxError: invalid character in identifier (<ipython-input-1-4f937a514f6f>, line 3)

### precision vs. fraction of expert annotations sampled

In [None]:
plt.figure()
for ind, fraction in enum(fraction_list):
    plt.scatter([fraction]*num_trials, recall_list_list[ind], c='cyan')
    plt.scatter([fraction], [np.mean(recall_list_list[ind])], c='orange'')
plt.xlabel('fraction of expert annotations sampled')
plt.ylabel('recall')
plt.show()