### Imports

In [1]:
from caveclient import CAVEclient
from intern import array
import pickle
import numpy as np
from agents import data_loader
from cloudvolume import CloudVolume
from membrane_detection import membranes
from agents.scripts import precompute_membrane_vectors, create_post_matrix, merge_paths, get_soma
import agents.sensor
from agents.run import run_agents
import aws.sqs as sqs
import sys
import time
import ast
import pandas as pd
import agents.scripts as scripts
from drive import drive
from finding_orphans import *
from math import sqrt
from tip_finding import tip_finder_decimation

### Loading in data

In [2]:
load = pd.read_csv("agents/Data/endpoints_gt5.csv")
endpoints_dict = {}

In [3]:
load.head()

Unnamed: 0,neuron,ng_link,seg_id,pink_pts,num_endpoints,endpoints,comments,detailed_comments
0,8.646911e+17,https://neuroglancer.neuvue.io/?json_url=https...,864691135909994000,"(402188, 228684, 24029)",2,"((402584, 228856, 23991), (402985, 229235, 235...",good,
1,,,864691135247440303,"(401258, 224832, 24029)",3,"((401612, 224623, 23991), (405257, 226318, 236...",good,
2,,,864691134794123793,"(401314, 228366, 24424)",2,"((401242, 228382, 24444), (400982, 228457, 245...",m,merged to two axon pieces
3,,,864691135772363453,"(400199, 220721, 24029)",7,"((401289, 218721, 23991), (399895, 216533, 235...",m,
4,,,864691135314714227,"(397870, 232292, 24004)",2,"((397867, 232230, 23999), (397854, 232252, 239...",good,


In [None]:
#creating series objects for the desired columns 
segIDs = load['seg_id']
Endpoints = load['endpoints']

In [None]:
load = endpoint_generator(load, invalidation_d= 5000, humfrey_iters= 50, decimation_factor= 0.5)

In [8]:
t1, skel, mesh_obj = tip_finder_decimation(
    864691135909994000, inval_d=5600, cube_side_len=300, decimation_factor=0.5)]






100%|██████████| 1670/1670 [00:00<00:00, 261059.51it/s]


In [None]:
#correctly  have 2 columns for the proposed vs gt endpoints
load.head(10)

In [9]:
t1

[(402937, 229087, 23546), (402637, 228637, 23996)]

### Helper functions we wrote

In [None]:
def find_euclidian_distance(proposed_endpoint, gt_endponts_array):
    diffs = gt_endponts_array - proposed_endpoint
    diffs_distance = np.sqrt(np.sum(np.square(diffs), axis = 1))
    min_dist_ind = np.argmin(diffs_distance)
    min_dist = diffs_distance[min_dist_ind]
    return min_dist

    # smallest_distance = 2**30
    # for endpoint in gt_endponts_array:
    #     difference = proposed_endpoint - endpoint
    #     distance = np.sqrt(np.sum(np.square(difference)))
    #     if(distance < smallest_distance):
    #         smallest_distance = distance 
    # return smallest_distance 
            
    #could add endpoint to dict

In [None]:
def endpoint_generator(load, invalidation_d, humfrey_iters, decimation_factor):
    for idx, i in enumerate(load['seg_id']):
        if idx == 20:
            break
        try:
            t1, skel, mesh_obj = tip_finder_decimation(str(i))
            endpoints_dict[i] = t1
        except:
            print(f"\n\nSeg {i} returned error on get. Skipping.\n")
            pass
    load['endpoints'] = load['endpoints'].apply(
        lambda x: list(ast.literal_eval(x)))
    load['proposed_endpoints'] = load.seg_id.map(endpoints_dict)
    return load

In [None]:
def get_recall(num_correct, array_endpoints): 
    recall = num_correct / len(array_endpoints)
    return recall * 100

In [None]:
def get_precision(num_correct, array_proposed): 
    precision = num_correct / len(array_proposed)
    return precision * 100

In [None]:
def testing_metrics(load, threshold, run, output):
    for index, row in load.iterrows():
        if index == 20:
            break
        
        endpoint_array = np.array(row["endpoints"])
        proposed_endpoints_array = np.array(row["proposed_endpoints"])
        segID = row["seg_id"]

        #skip anything seg_id that isn't 'good'
        if row["comments"] != 'good':
            print("skipping \n")
            continue
        
        #if both say no endpoints, then it's correct
        elif (len(proposed_endpoints_array.shape) == 0 and len(endpoint_array.shape) == 0):
            out_df.at[index, 'precision'] =1.0
            out_df.at[index, 'recall'] = 1.0
            out_df.at[index, 'f1'] = 1.0

            continue

        #if we propose no endpoints but there are endpoints, it's wrong
        elif (len(proposed_endpoints_array.shape) == 0 and len(endpoint_array.shape) > 0):
            out_df.at[index, 'precision'] = 0.0
            out_df.at[index, 'recall'] = 0.0
            out_df.at[index, 'f1'] = 0.0
            continue 

        #if we propose endpoints but there are none, it's wrong
        elif proposed_endpoints_array.size > 0 and endpoint_array.size == 0:
            out_df.at[index, 'precision'] = 0.0
            out_df.at[index, 'recall'] = 0.0
            out_df.at[index, 'f1'] = 0.0
            continue

        #should skip for now if the endpoint array is size zero to avoid divide by zero error
        elif endpoint_array.size == 0:
            out_df.at[index, 'precision'] = null
            out_df.at[index, 'recall'] = null
            out_df.at[index, 'f1'] = null
            continue

        else:                                                                                   
            endpoint_ids = np.arange(0, len(endpoint_array))
            test_ids = np.arange(0, len(proposed_endpoints_array)) + len(proposed_endpoints_array)

            #get precision and recall at end of each iteration 
            analysis = run_synapse_analysis(
                endpoint_array,
                np.array(endpoint_ids),
                proposed_endpoints_array,
                np.array(test_ids),
                threshold,
                iso_correction=10,
                )        

            output.loc[len(output.index)] = [run, analysis.precision, analysis.recall, analysis.f1, segID, len(endpoint_array), endpoint_array, row["comments"], proposed_endpoints_array]
    return output

### Analysis functions

In [None]:
"""
Various utility classes and functions for Confirms.
"""
from copy import deepcopy

import numpy as np
import pandas as pd
from scipy import spatial
from scipy.optimize import linear_sum_assignment


def calculate_precision_recall(tp, fp, fn):
    """
    Calculate precision/recall from given true positives, false positives, and false negatives.
    Parameters
    ----------
    tp : int
        The number of true positives.
    fp : int
        The number of false positives.
    fn : int
        The number of false negatives.
    Returns
    -------
    precision : float
        The precision score.
    recall : float
        The recall score.
    """
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    return precision, recall


def calculate_f1(precision, recall):
    """
    Calculate the F1 score from precision/recall scores.
    Parameters
    ----------
    precision : float
        The precision score.
    recall : float
        The recall score.
    Returns
    -------
    f1 : float
        The F1 score.
    """
    return 2 * ((precision * recall) / (precision + recall))


def get_summary_metrics(array):
    """
    Calculate a number of summary metrics on an array of numbers.
    Parameters
    ----------
    array : array_like
        Array containing numbers who summary metrics is desired.
    Returns
    -------
    metrics : dict
        Dict containing the mean, median, max, min, range, standard deviation,
        and variance of the input array.
    """
    summary_object = {}
    array = np.array(array)

    summary_object["mean"] = np.mean(array)
    summary_object["median"] = np.median(array)
    summary_object["max"] = np.amax(array)
    summary_object["min"] = np.amin(array)
    summary_object["range"] = np.mean(array)
    summary_object["stddev"] = np.std(array)
    summary_object["variance"] = np.var(array)

    return summary_object


def munkres_assignment(workers, jobs):
    """
    Perform hungarian-munkres assignment.
    Parameters
    ----------
    workers : array_like
        Array containing the first set of points.
    jobs : array_like
        Array containing the second set of points.
    Returns
    -------
    cost_matrix : numpy.ndarray
        Matrix containing pairwise distances (the cost of assignment).
    row_ind : numpy.ndarray
        Row indices of cost_matrix for optimal assignment.
    col_ind : numpy.ndarray
        Column indices of cost_matrx for optimal assignment.
    """
    cost_matrix = spatial.distance.cdist(workers, jobs, "euclidean")
    row_ind, col_ind = linear_sum_assignment(cost_matrix)
    return cost_matrix, row_ind, col_ind


def make_isotropic(xyz, correction, dimen=2):
    """
    Correct anisotropy in a collection of x, y, z coordinates.
    This function performs a deepcopy of xyz before making the necessary modifications.
    Parameters
    ----------
    xyz : numpy.ndarray or pandas.DataFrame:
        The coordinate values.
    correction : float
        The value to correct anisotrophy.
    dimen : int
        Index of last dimension to which to apply the correction. Default is 2.
    Returns
    -------
    iso_xyz : numpy.ndarray or pandas.DataFrame
        An isotropic version of xyz.
    """

    if not isinstance(xyz, np.ndarray) and not isinstance(xyz, pd.DataFrame):
        xyz = np.asarray(xyz)

    shape = np.shape(xyz)
    isotropic_xyz = xyz.copy()
    size = shape[-1]
    if dimen >= size or dimen < 0:
        raise ValueError("improper dimen value (valid: 0 through {})".format(size - 1))
    if isinstance(xyz, np.ndarray):
        isotropic_xyz[..., dimen] = isotropic_xyz[..., dimen] * correction
    else:
        isotropic_xyz.iloc[:, dimen] = isotropic_xyz.iloc[:, dimen] * correction
    return isotropic_xyz


# def get_range(center, pixel_size, radius):
#     """
#     <Description here>
#     Parameters
#     ----------
#     center : <type>
#         <Description>
#     pixel_size : <type>
#         <Description>
#     radius : <type>
#         <Description>
    
#     Returns
#     -------
#     range : numpy.ndarray
#         <Description>
#     """
#     return np.asarray(
#         [int(center - (radius / pixel_size)), int(center + (radius / pixel_size))],
#         dtype="int",

In [None]:
"""
Confirms synapse processing and analysis functions.
"""
from collections import namedtuple

import numpy as np
import pandas as pd
from scipy import spatial
from scipy.optimize import linear_sum_assignment

# from . import utils
# import utils

SynapseMetrics = namedtuple(
    "SynapseMetrics",
    ["precision", "recall", "f1", "tp_gt_ids", "tp_test_ids", "fp_ids", "fn_ids"],
)


def filter_synapse_id_core(volume, xyz, ids, box_radius_nm=2500):
    """
    Filter synapses to only return those within a central core.
    Parameters
    ----------
    volume : dict
        The volume to filter on.
    xyz : array_like
        Synapse xyz coordinates.
    ids : array_like
        Synapse hash (ids).
    box_radius_nm : int
        radius of cube, from volume core.
    Returns
    -------
        xyz : numpy.ndarray
            Synapse coordinates.
        ids : numpy.ndarray
            Synapse ids.
    """

    xyz_out = []
    id_out = []
    center = np.asarray(volume["center"], "float")
    base_resolution = np.asarray(volume["base_resolution"], "float")
    annotation_resolution = np.asarray(volume["resolution"], "float")

    pad_vx = box_radius_nm / base_resolution[0] / (2 ** annotation_resolution)
    pad_vy = box_radius_nm / base_resolution[1] / (2 ** annotation_resolution)
    pad_vz = box_radius_nm / base_resolution[2]
    xr = (center[0] - pad_vx, center[0] + pad_vx)
    yr = (center[1] - pad_vy, center[1] + pad_vy)
    zr = (center[2] - pad_vz, center[2] + pad_vz)

    for i in range(len(xyz)):
        x = xyz[i][0]
        y = xyz[i][1]
        z = xyz[i][2]

        if (
            x > xr[0]
            and x < xr[1]
            and y > yr[0]
            and y < yr[1]
            and z > zr[0]
            and z < zr[1]
        ):
            xyz_out.append(xyz[i])
            id_out.append(ids[i])

    return np.array(xyz_out), np.array(id_out, dtype=np.object)


def synapse_match(
    xyz_truth, xyz_detect, id_truth, id_detect, thresh
):  # pylint: disable=R0914
    """
    <Description here>
    Parameters
    ----------
    xyz_truth : array_like
        <description>
    xyz_detect : array_like
        <description>
    id_truth : array_like
        <description>
    id_detech : array_like
        <description>
    thresh : float
        <description>
    Returns
    -------
    id_lookup : <type>
        <description>
    """

    # pylint: disable=C0103

    # Ensure we have numpy arrays
    xyz_truth = np.asarray(xyz_truth)
    xyz_detect = np.asarray(xyz_detect)
    id_truth = np.asarray(id_truth)
    id_detect = np.asarray(id_detect)

    cost, row_ind, col_ind = munkres_assignment(xyz_truth, xyz_detect)
    print(cost)
    match_idx = np.where(cost[row_ind, col_ind] < thresh)

    if len(match_idx) > 0:
        # row is idx of GT TP
        # col is idx of student TP
        gt_tp_idx, det_tp_idx = row_ind[match_idx], col_ind[match_idx]
        gt_tp_ids, det_tp_ids = id_truth[gt_tp_idx], id_detect[det_tp_idx]
        # Combine into pairs
        id_lookup = np.column_stack((gt_tp_ids, det_tp_ids))
    else:
        gt_tp_idx = []
        det_tp_idx = []
        id_lookup = np.column_stack(
            (np.array([], dtype="object"), np.array([], dtype="object"))
        )

    # not in row (set diff) are FN
    gt_syn_idx = np.arange(0, len(xyz_truth))
    fn_idx = np.setdiff1d(gt_syn_idx, gt_tp_idx)
    if len(fn_idx) > 0:
        fn_ids = id_truth[fn_idx]
        id_lookup_fn = np.column_stack((fn_ids, np.repeat(None, len(fn_ids))))
        id_lookup = np.concatenate((id_lookup, id_lookup_fn))

    # not in col (set diff) are FP
    det_syn_idx = set(np.arange(0, len(xyz_detect)))
    fp_idx = np.asarray(list(det_syn_idx.difference(det_tp_idx)))
    if len(fp_idx) > 0:
        fp_ids = id_detect[fp_idx]
        id_lookup_fp = np.column_stack((np.repeat(None, len(fp_ids)), fp_ids))
        id_lookup = np.concatenate((id_lookup, id_lookup_fp))

    return pd.DataFrame(id_lookup, columns=["ground_truth", "detect"])


def run_synapse_analysis(
    gt_xyzs,
    gt_ids,
    test_xyzs,
    test_ids,
    threshold,
    iso_corrected=False,
    iso_correction=1,
):
    """
    <Description here>
    Parameters
    ----------
    gt_xyzs : numpy.ndarray
        Array of ground truth xyz coordinates
    gt_ids : numpy.ndarray
        Array of ids associated with ground truth xyz coordinates
    test_xyzs : numpy.ndarray
        Array of test xyz coordinates
    test_ids : numpy.ndarray
        Array of ids associated with test xyz coordinates
    threshold : float
        Synapse matching threshold
    iso_corrected : boolean
        Mark whether the data is isotropic. If not, it will be made isotropic using the
        `iso_correction` parameter.
    iso_correction : float
        Value to correct anistropy.
    Returns
    -------
    sm : SynapseMetrics
        Resultant object containing precision, recall, and F1 scores, along with
        with true positive (both ground truth and test), false positive, and false negative
        ids.
    """
    # pylint: disable=R0913,R0914
    if not iso_corrected:
        # gt_xyzs = utils.make_isotropic(gt_xyzs, iso_correction)
        gt_xyzs = make_isotropic(gt_xyzs, iso_correction)
        # test_xyzs = utils.make_isotropic(test_xyzs, iso_correction)
        test_xyzs = make_isotropic(test_xyzs, iso_correction)

    results_table = synapse_match(gt_xyzs, test_xyzs, gt_ids, test_ids, threshold)

    tp = results_table.dropna()
    fn = results_table[results_table.detect.isnull()]
    fp = results_table[results_table.ground_truth.isnull()]

    assert len(tp.ground_truth) == len(
        tp.detect
    ), "true positive ground truth and test size mismatch"

    tp_count = len(tp)
    fp_count = len(fp)
    fn_count = len(fn)

    try:
        # precision, recall = utils.calculate_precision_recall(
        #     tp_count, fp_count, fn_count
        # )

        precision, recall = calculate_precision_recall(
            tp_count, fp_count, fn_count
        )

    except ZeroDivisionError:
        precision, recall = np.nan, np.nan

    try:
        # f1 = utils.calculate_f1(precision, recall)
        f1 = calculate_f1(precision, recall)
    except ZeroDivisionError:
        f1 = np.nan

    sm = SynapseMetrics(
        precision=precision,
        recall=recall,
        f1=f1,
        tp_gt_ids=np.asarray(tp.ground_truth),
        tp_test_ids=np.asarray(tp.detect),
        fp_ids=np.asarray(fp.detect),
        fn_ids=np.asarray(fn.ground_truth),
    )
    return sm

In [None]:
#TESTING
gt_xyzs = np.array([[100475, 143982,  21654],
                    [100583, 144194,  21702]])

gt_ids = np.array([1,2])

test_xyzs = np.array([[100979, 144058,  21663],
                      [100780, 144830,  21653]])

test_ids = np.array([3,4])

threshold = 500
print("EGHSEGH")
analysis = run_synapse_analysis(
    gt_xyzs,
    gt_ids,
    test_xyzs,
    test_ids,
    threshold,
    iso_correction=10,
)



### Using analysis functions and helper functions to calculate recall, precision, f1 for multiple seg ids

In [None]:
# out_df = load[['seg_id', 'num_endpoints', 'endpoints', 'comments', 'proposed_endpoints']]
# empty_l = [None for _ in range(len(load))]
# out_df.insert(0, 'run', range(len(load)))
# out_df.insert(1, 'precision', empty_l)
# out_df.insert(2, 'recall', empty_l)
# out_df.insert(3, 'f1', empty_l)

In [None]:
# out_df.head(2)
# #TEST 1: invalidation_d at default: 12000, WITH Humfrey_Smoothing: 50 iterations, Decimation_Factor = 0.50

# #should track total amount of correct gt recovered 

# def testing_metrics(load, threshold, run, output):
#     # # Create out_df
#     # out_df = load[['seg_id', 'num_endpoints',
#     #                'endpoints', 'comments', 'proposed_endpoints']]
#     # empty_l = [None for _ in range(len(load))]
#     # out_df.insert(0, 'run', range(len(load)))
#     # out_df.insert(1, 'precision', empty_l)
#     # out_df.insert(2, 'recall', empty_l)
#     # out_df.insert(3, 'f1', empty_l)

#     # FIGURE OUT WHETHER WE SHOULD PASS IN OUT_DF INTO THIS FUNCTION AND APPEND IN PLACE OR IF WE SHOULD RETURN OUT_DF AND CONCAT

#     for index, row in load.iterrows():
#         endpoint_array = np.array(row["endpoints"])
#         proposed_endpoints_array = np.array(row["proposed_endpoints"])
#         segID = row["seg_id"]

#         #skip anything seg_id that isn't 'good'
#         if row["comments"] != 'good':
#             print("skipping \n")
#             continue
        
#         #if both say no endpoints, then it's correct
#         elif (len(proposed_endpoints_array.shape) == 0 and len(endpoint_array.shape) == 0):
#             out_df.at[index, 'precision'] =1.0
#             out_df.at[index, 'recall'] = 1.0
#             out_df.at[index, 'f1'] = 1.0

#             continue

#         #if we propose no endpoints but there are endpoints, it's wrong
#         elif (len(proposed_endpoints_array.shape) == 0 and len(endpoint_array.shape) > 0):
#             out_df.at[index, 'precision'] = 0.0
#             out_df.at[index, 'recall'] = 0.0
#             out_df.at[index, 'f1'] = 0.0
#             continue 

#         #if we propose endpoints but there are none, it's wrong
#         elif proposed_endpoints_array.size > 0 and endpoint_array.size == 0:
#             out_df.at[index, 'precision'] = 0.0
#             out_df.at[index, 'recall'] = 0.0
#             out_df.at[index, 'f1'] = 0.0
#             continue

#         #should skip for now if the endpoint array is size zero to avoid divide by zero error
#         elif endpoint_array.size == 0:
#             out_df.at[index, 'precision'] = null
#             out_df.at[index, 'recall'] = null
#             out_df.at[index, 'f1'] = null
#             continue

#         else:                                                                                   
#             endpoint_ids = np.arange(0, len(endpoint_array))
#             test_ids = np.arange(0, len(proposed_endpoints_array)) + len(proposed_endpoints_array)

#             #get precision and recall at end of each iteration 
#             analysis = run_synapse_analysis(
#                 endpoint_array,
#                 np.array(endpoint_ids),
#                 proposed_endpoints_array,
#                 np.array(test_ids),
#                 threshold,
#                 iso_correction=10,
#                 )        

#             output.loc[len(output.index)] = [run, analysis.precision, analysis.recall, analysis.f1, segID, len(endpoint_array), endpoint_array, row["comments"], proposed_endpoints_array]

#             # out_df.at[index,'precision'] = analysis.precision
#             # out_df.at[index,'recall'] = analysis.recall
#             # out_df.at[index,'f1'] = analysis.f1
            
#     return output

In [None]:
# #TEST 1: invalidation_d at default: 12000, WITH Humfrey_Smoothing: 50 iterations, Decimation_Factor = 0.50

# #should track total amount of correct gt recovered 

# def testing_metrics(load, threshold):
#     for index, row in load.iterrows():
#         #reset num correct
#         # num_correct = 0
#         endpoint_array = np.array(row["endpoints"])
#         proposed_endpoints_array = np.array(row["proposed_endpoints"])
#         # print(f"Seg id: {row['seg_id']}")
#         segID = row["seg_id"]
#         # print("Endpoint Array:")
#         # print(endpoint_array)
#         #print(len(endpoint_array))
#         # print()
#         # print("Proposed Endpoint Array:")
#         # print(proposed_endpoints_array)
#         # print()
#         #print(load["Endpoints"])
        
#         #skip anything seg_id that isn't 'good'
#         if row["comments"] != 'good':
#             print("skipping \n")
#             continue

#         #learn better pandas filtered rather than if else statements     
#         #maybe skip v-shaped segments
#         #add in a label for ground truth 
        
#         #if both say no endpoints, then it's correct
#         elif (len(proposed_endpoints_array.shape) == 0 and len(endpoint_array.shape) == 0):
#             # print("correct, no endpoints")
#             # print("\nNEXT POINTS")
#             out_df.at[index, 'precision'] =1.0
#             out_df.at[index, 'recall'] = 1.0
#             out_df.at[index, 'f1'] = 1.0

#             continue

#         #if we propose no endpoints but there are endpoints, it's wrong
#         elif (len(proposed_endpoints_array.shape) == 0 and len(endpoint_array.shape) > 0):
#             # print("no proposed endpoints found, but ground truth endpoints exist")
#             # print("\nNEXT POINTS")
#             out_df.at[index, 'precision'] = 0.0
#             out_df.at[index, 'recall'] = 0.0
#             out_df.at[index, 'f1'] = 0.0
#             continue 

#         #if we propose endpoints but there are none, it's wrong
#         elif proposed_endpoints_array.size > 0 and endpoint_array.size == 0:
#             # print("not correct--too many proposed endpoints but none ground truth endpoints exist")
#             # print("\nNEXT POINTS")
#             out_df.at[index, 'precision'] = 0.0
#             out_df.at[index, 'recall'] = 0.0
#             out_df.at[index, 'f1'] = 0.0
#             continue

#         #should skip for now if the endpoint array is size zero to avoid divide by zero error
#         elif endpoint_array.size == 0:
#             # print("array size is 0")
#             # print("\nNEXT POINTS")
#             out_df.at[index, 'precision'] = null
#             out_df.at[index, 'recall'] = null
#             out_df.at[index, 'f1'] = null
#             continue

#         else:                                                                                   
#             # for proposed_endpoint in proposed_endpoints_array:
#                 # smallest_distance= find_euclidian_distance(proposed_endpoint, endpoint_array)
#                 # print(smallest_distance)
                
#                 #compare to threshold and return yes or no 
#                 # if smallest_distance < threshold:
#                 #     print("correct")
#                 #     num_correct = num_correct + 1
#                 #     print()
#                 # else:
#                 #     print("wrong")
#                 #     print()
#                 # pass

#         # endpoint_ids = []
#         # test_ids = []
#         # for i in range(0, len(endpoint_array)):
#         #     endpoint_ids.append(segID)
            
#         # for i in range(0, len(proposed_endpoints_array)):
#         #     test_ids.append(segID)

#         endpoint_ids = np.arange(0, len(endpoint_array))
#         test_ids = np.arange(0, len(proposed_endpoints_array)) + len(proposed_endpoints_array)

#         #get precision and recall at end of each iteration 
#         analysis = run_synapse_analysis(
#             endpoint_array,
#             np.array(endpoint_ids),
#             proposed_endpoints_array,
#             np.array(test_ids),
#             threshold,
#             iso_correction=10,
#             )        

#         out_df.at[index,'precision'] = analysis.precision
#         out_df.at[index,'recall'] = analysis.recall
#         out_df.at[index,'f1'] = analysis.f1

In [None]:
out_df.to_csv('results.csv')

In [None]:
pd_df = pd.DataFrame(columns=["run", "precision", "recall"])
pd_df_2 = pd.DataFrame(columns=["run", "precision"])
for i in range(0,7):
    pd_df_2.loc[len(pd_df.index)] = []


make threshold for each endpoint
experiment with box (cannot overlap)
for each endpoint see if it finds in box -- if yes true found endpont, if false --not an endpoint
find distance from predicted to ground truth 
could still do MSE 

Threshold: 
-find the size of the segment (maybe connected componenets)
-array from the skel
-flat is edge case 
-maybe a percentage 
--a hard number 





### Testing different combinations of params

In [None]:
# Loading in data
load = pd.read_csv("agents/Data/endpoints_gt5.csv")
endpoints_dict = {}

#creating series objects for the desired collumns
segIDs = load['seg_id']
Endpoints = load['endpoints']


In [None]:
# Generate out_df

out_df = pd.DataFrame(columns=["run", "precision", "recall", "f1", "seg_id", "num_endpoints", "endpoints", "comments", "proposed_endpoints"])

In [None]:
run = 0
# for invalidation_d in range(4000, 6001, 200):
for invalidation_d in range (4000, 4201, 200):
    # for num_humfrey_iters in range(0,251,25):
    for num_humfrey_iters in range (50,76, 25):
        # for decimation_factor in np.arange(0.3, 0.71, 0.04):
        for decimation_factor in np.arange(0.3, 0.35, 0.04):
            run+=1
            load_with_proposed_endpoints = endpoint_generator(load, invalidation_d= invalidation_d, humfrey_iters= num_humfrey_iters, decimation_factor= decimation_factor)
            out_df = testing_metrics(load_with_proposed_endpoints, threshold = 500, run = run, output = out_df)


### Psuedocode:

generate load(loading in w/o proposed endpoints)
generate the out_df here

for (variations of invalidation_d):
    for (variations of num_humfrey_iter):
        for (variations of decimation_factor):
            load_w_proposed = endpoint_generator(load, put in the invalidation_d, humfrey_iters, and decimation factor)
            pass load with proposed into testing block, have testing block return a df
            append the df returned to out_df


export out_df to a csv(results.csv)

### Comments

Need to complie results!!!! 

add to dataframe all of the precision and recalls 
plot the results and check for 
params don't have to be the best 


make a sepereate dataframe 

look: pd.concat

mention the params 

or have a collumn that has run number 

PRECISION: #test at ground truth / # test  

RECALL: # gt recovered / number ground truth 
(would be 100% if you found all the correct endponts...NO EXTRAS)