In [3]:
from scipy.optimize import linear_sum_assignment
import torch
import argparse
from torch.utils.data import DataLoader
import numpy as np
from model import NIT_Registration, neuron_data_pytorch
import pickle
import os
from cpd_rigid_sep import register_rigid
from cpd_nonrigid_sep import register_nonrigid
import matplotlib.pyplot as plt
import time
import scipy.io as sio

import pandas as pd

In [4]:
def find_match(label_ref, label_mov):
    if len(label_ref) == 0 or len(label_mov) == 0:
        return []

    pt1_dict = dict()
    labelmov_indices = np.where(label_mov != '')[0]
    for idx1 in labelmov_indices:
        pt1_dict[label_mov[idx1]] = idx1
        
    match = list()
    unlabel = list()
    for idx2, _ in enumerate(label_ref):
        if label_ref[idx2] in pt1_dict:
            match.append([idx2, pt1_dict[label_ref[idx2]]])

        else:
            unlabel.append(idx2)
    
    return np.array(match), np.array(unlabel)

In [12]:
aligned_path = '/Users/danielysprague/foco_lab/data/aligned_2024_03_11/aligned_full'

temp = pd.read_csv(os.path.join(aligned_path, '1.csv'))
temp = temp.fillna('')
temp_gt = np.asarray(temp['ID'])
temp_pos = np.asarray(temp[['aligned_x', 'aligned_y', 'aligned_z']])
temp_pos = (temp_pos - np.median(temp_pos, axis=0))/84
temp_pos_aff = np.asarray(temp[['real_X', 'real_Y', 'real_Z']])
temp_pos_aff = (temp_pos_aff - np.max(temp_pos_aff, axis=0)/2)/84
temp_col = np.asarray(temp[['aligned_R', 'aligned_G', 'aligned_B']])
temp_col = ((temp_col - np.min(temp_col, axis=0))/(np.max(temp_col, axis=0)-np.min(temp_col, axis=0))-0.5)/5

test = pd.read_csv(os.path.join(aligned_path, '1.csv'))
test = test.fillna('')
test_gt = np.asarray(test['ID'])
test_pos = np.asarray(test[['aligned_x', 'aligned_y', 'aligned_z']])
test_pos = (test_pos - np.median(test_pos, axis=0))/84
test_pos_aff = np.asarray(test[['real_X', 'real_Y', 'real_Z']])
test_pos_aff = (test_pos_aff - np.max(test_pos_aff, axis=0)/2)/84
test_col = np.asarray(test[['aligned_R', 'aligned_G', 'aligned_B']])
test_col = ((test_col - np.min(test_col, axis=0))/(np.max(test_col, axis=0)-np.min(test_col, axis=0))-0.5)/5

match, unlabel = find_match(temp_gt, test_gt)

assigns, score = cpd_match(test_pos, temp_pos, test_col, temp_col, match, np.inf, method='color',plot=False)
corr = np.zeros((len(test_gt),5))

print(score)

for k in range(1):
    row, col = assigns[k]

    for l, idx in enumerate(row):
        col_label = col[l]

        match_idx = np.argwhere(match[:,1]==idx)
        match_ref = match[match_idx, 0]

        if col_label == match_ref:
            corr[idx, k] =1

corr1 = corr[:,0]
corr2 = np.logical_or(corr[:,0], corr[:,1])
corr3 = np.logical_or(corr2, corr[:,2])
corr4 = np.logical_or(corr3, corr[:,3])
corr5 = np.logical_or(corr4, corr[:,4])

print(np.sum(corr1)/len(match))
print(np.sum(corr5)/len(match))



0.999999771261356
1.0
1.0


  if col_label == match_ref:


In [178]:
import time

In [5]:
def find_match_dis(mov, ref, match_dict):
    dis_m = np.sum((mov[:, np.newaxis, :] - ref[np.newaxis, :, :]) ** 2, axis=2)
    dis_list = dis_m[match_dict[:, 1], match_dict[:, 0]]
    return dis_list


def cpd_match(mov, ref, mov_col, ref_col, match_dict, best_Np, method='max', plot=False):
    w = 0.1
    lamb = 4e3
    #lamb = 2e3
    beta = 0.25
    # cpd transform
    mov_rigid, _, _, sigma2, Np = register_rigid(ref, mov, w=w, fix_scale=True)
    ori_dis = find_match_dis(mov_rigid, ref, match_dict)
    mov_rigid_inv = np.copy(mov_rigid)
    mov_rigid_inv[:, :2] *= -1
    mov_rigid_inv, _, _, sigma2, Np_inv = register_rigid(ref, mov_rigid_inv, w=w, fix_scale=True)
    inv_dis = find_match_dis(mov_rigid_inv, ref, match_dict)
    if np.mean(ori_dis) > np.mean(inv_dis):
        mov_new = mov_rigid_inv
        Np = Np_inv
    else:
        mov_new = mov_rigid

    if best_Np > Np: #do not run nonrigid if rigid alignment has cost more than 10% lower than the current best for this test file
        return [], None

    mov_nonrigid = register_nonrigid(ref, mov_new, w=w, lamb=lamb, beta=beta)
    # plot the results.
    if plot:
        plt.scatter(mov[:, 0], mov[:, 1], c='red')
        plt.scatter(mov_new[:, 0], mov_new[:, 1], c='yellow')
        plt.scatter(mov_nonrigid[:, 0], mov_nonrigid[:, 1], c='green')
        plt.scatter(ref[:, 0], ref[:, 1], c='black')
        plt.show()

    mov_poscol = np.hstack((mov_nonrigid, mov_col))
    ref_poscol = np.hstack((ref, ref_col))

    #pos_dis = find_match_dis(mov, ref, match_dict)
    #col_dis = find_match_dis(mov_col, ref_col, match_dict)

    if method == 'rigid':
        dis_m = np.sum((mov_new[:, np.newaxis, :] - ref[np.newaxis, :, :]) ** 2, axis=2)
    elif method == 'color':
        dis_m = np.sum((mov_poscol[:, np.newaxis,:]-ref_poscol[np.newaxis,:,:])**2, axis=2)
    else:
        dis_m = np.sum((mov_nonrigid[:, np.newaxis, :] - ref[np.newaxis, :, :]) ** 2, axis=2)

    assigns = {}

    for k in range(5):

        row_inds, col_inds = linear_sum_assignment(dis_m)
        
        assigns[k] = [row_inds, col_inds]

        dis_m[row_inds, col_inds] = np.inf

    return assigns, Np

In [4]:
%matplotlib qt

fig, axs = plt.subplots()
sns.heatmap(acc)
plt.show()

NameError: name 'sns' is not defined

In [7]:
import warnings

In [6]:
#Skip files that are too bent or have other issues
skip_files = ['20231013-9-30-0', '20230412-20-15-17', '2023-01-23-01', '20239828-11-14-0', '2023-01-05-01', '2023-01-10-14', '2022-06-28-07', '2022-07-26-01', '2023-01-19-15', '2022-07-15-06', '2022-08-02-01', '2023-01-09-08', '2023-01-09-28', '2023-01-10-14', '2023-01-17-14', '2023-01-19-22', '2023-01-23-01']

In [27]:
aligned_path = '/Users/danielysprague/foco_lab/data/aligned_2024_03_11/aligned_full'

acc = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
acc2 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
acc3 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
acc4 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
acc5 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))

files = [f for f in os.listdir(aligned_path) if not f.startswith('.') and not f[:-4] in skip_files]
print(len(files))

best_score_idxs = np.zeros(len(files))
best_scores = np.zeros(len(files))

with warnings.catch_warnings(action="ignore"):
    for j, test_file in enumerate(files):
        print(j)
        print(test_file)
        curr_best_score = 0
        curr_best_idx = None
        test = pd.read_csv(os.path.join(aligned_path, test_file))
        test = test.drop_duplicates(subset=['ID'], keep='first')
        test = test.fillna('')
        test_gt = np.asarray(test['ID'])
        test_pos = np.asarray(test[['aligned_x', 'aligned_y', 'aligned_z']])
        test_pos = (test_pos - np.median(test_pos, axis=0))/84
        test_pos_aff = np.asarray(test[['real_X', 'real_Y', 'real_Z']])
        test_pos_aff = (test_pos_aff - np.median(test_pos_aff, axis=0))/84
        test_col = np.asarray(test[['real_R', 'real_G', 'real_B']])
        test_col = ((test_col - np.min(test_col, axis=0))/(np.max(test_col, axis=0)-np.min(test_col, axis=0))-0.5)/5

        num_labels = len(test[test['ID']!=''])
        print(num_labels)

        for i, temp_file in enumerate(files):
            if j == i:
                continue
            temp = pd.read_csv(os.path.join(aligned_path, temp_file))
            temp = temp.drop_duplicates(subset=['ID'], keep='first' )
            temp = temp.fillna('')
            temp_gt = np.asarray(temp['ID'])
            temp_pos = np.asarray(temp[['aligned_x', 'aligned_y', 'aligned_z']])
            temp_pos = (temp_pos - np.median(temp_pos, axis=0))/84
            temp_pos_aff = np.asarray(temp[['real_X', 'real_Y', 'real_Z']])
            temp_pos_aff = (temp_pos_aff - np.median(temp_pos_aff, axis=0))/84
            temp_col = np.asarray(temp[['real_R', 'real_G', 'real_B']])
            temp_col = ((temp_col - np.min(temp_col, axis=0))/(np.max(temp_col, axis=0)-np.min(temp_col, axis=0))-0.5)/5

            match, unlabel = find_match(temp_gt, test_gt)

            assigns, score = cpd_match(test_pos, temp_pos, test_col, temp_col,match, curr_best_score, method='color',plot=False)
            corr = np.zeros((len(test_gt),5))

            if score is None:
                continue
            elif score > curr_best_score:
                curr_best_score = score
                curr_best_idx = i

                for k in range(5):
                    row, col = assigns[k]

                    for l, idx in enumerate(row):
                        col_label = col[l]

                        match_idx = np.argwhere(match[:,1]==idx)
                        match_ref = match[match_idx, 0]

                        if col_label == match_ref:
                            corr[idx, k] =1

                corr1 = corr[:,0]
                corr2 = np.logical_or(corr[:,0], corr[:,1])
                corr3 = np.logical_or(corr2, corr[:,2])
                corr4 = np.logical_or(corr3, corr[:,3])
                corr5 = np.logical_or(corr4, corr[:,4])
                
                acc[j, i] =  np.sum(corr1)/num_labels
                acc2[j, i] =  np.sum(corr2)/num_labels
                acc3[j, i] =  np.sum(corr3)/num_labels 
                acc4[j, i] =  np.sum(corr4)/num_labels
                acc5[j, i] =  np.sum(corr5)/num_labels

        best_scores[j] = curr_best_score
        best_score_idxs[j] = curr_best_idx

best_temp = {}
CPD_df = pd.DataFrame(columns=['Filename', 'Best_template', 'top1', 'top2','top3', 'top4', 'top5'])

for i, file in enumerate(files):
    best_idx = int(best_score_idxs[i])
    best_file = files[best_idx]
    best_temp[file] = (best_file, acc[i,best_idx], acc5[i,best_idx])
                       
    CPD_df.loc[len(CPD_df)] = [file[:-4], best_file[:-4],acc[i,best_idx], acc2[i,best_idx], acc3[i,best_idx], acc4[i,best_idx] ,acc5[i,best_idx]]

CPD_df.to_csv('/Users/danielysprague/foco_lab/data/Acc_CPD/match_all_temp.csv')

104
0
6.csv
67


1
7.csv
65
2
2022-12-21-06.csv
54
3
20221106-21-00-09.csv
33
4
20230322-20-16-50.csv
44
5
5.csv
64
6
2023-01-06-08.csv
72
7
4.csv
63
8
20230918-17-21-0.csv
156
9
2021-12-03-w00-NP1.csv
57
10
20221106-21-47-31.csv
53
11
20230322-22-43-03.csv
42
12
20230506-12-56-00.csv
47
13
20230506-14-24-57.csv
51
14
1.csv
62
15
38_YAaDV.csv
189
16
2023-01-16-08.csv
68
17
20230510-13-25-46.csv
46
18
56_YAaDV.csv
190
19
20190928_08.csv
183
20
3.csv
64
21
2023-01-16-22.csv
78
22
2.csv
58
23
2023-01-13-07.csv
72
24
20191104_10.csv
174
25
20190929_06.csv
175
26
20230928-14-27-0.csv
154
27
64_YAaDV.csv
189
28
2023-01-23-15.csv
78
29
20190929_07.csv
176
30
2023-01-17-07.csv
79
31
20191030_03.csv
182
32
20190929_05.csv
168
33
2022-03-05-w00-NP1.csv
52
34
20190925_04.csv
176
35
11_YAaLR.csv
193
36
76_YAaDV.csv
188
37
2022-07-15-12.csv
64
38
20191030_07.csv
175
39
20190925_01.csv
180
40
2023-01-17-01.csv
83
41
20230928-11-14-0.csv
157
42
2023-01-09-15.csv
95
43
20190929_03.csv
175
44
20231013-11-11-0.csv
163
4

In [32]:
aligned_path = '/Users/danielysprague/foco_lab/data/aligned_2024_03_11/aligned_NP_nomatch'


acc = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
acc2 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
acc3 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
acc4 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
acc5 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))

test_files = [f for f in os.listdir(aligned_path) if not f.startswith('.') and not f[:-4] in skip_files]
temp_files = [f for f in os.listdir(aligned_path) if f[:-4]+'.nwb' in os.listdir('/Users/danielysprague/foco_lab/data/final_nwb/NP_og')]
print(len(files))

best_score_idxs = np.zeros(len(files))
best_scores = np.zeros(len(files))

with warnings.catch_warnings(action="ignore"):
    for j, test_file in enumerate(test_files):
        print(j)
        print(test_file)
        curr_best_score = 0
        curr_best_idx = None
        test = pd.read_csv(os.path.join(aligned_path, test_file))
        test = test.drop_duplicates(subset=['ID'], keep='first')
        test = test.fillna('')
        test_gt = np.asarray(test['ID'])
        test_pos = np.asarray(test[['aligned_x', 'aligned_y', 'aligned_z']])
        test_pos = (test_pos - np.median(test_pos, axis=0))/84
        test_pos_aff = np.asarray(test[['real_X', 'real_Y', 'real_Z']])
        test_pos_aff = (test_pos_aff - np.median(test_pos_aff, axis=0))/84
        test_col = np.asarray(test[['real_R', 'real_G', 'real_B']])
        test_col = ((test_col - np.min(test_col, axis=0))/(np.max(test_col, axis=0)-np.min(test_col, axis=0))-0.5)/5

        num_labels = len(test[test['ID']!=''])
        print(num_labels)

        for i, temp_file in enumerate(temp_files):
            if j == i:
                continue
            temp = pd.read_csv(os.path.join(aligned_path, temp_file))
            temp = temp.drop_duplicates(subset=['ID'], keep='first' )
            temp = temp.fillna('')
            temp_gt = np.asarray(temp['ID'])
            temp_pos = np.asarray(temp[['aligned_x', 'aligned_y', 'aligned_z']])
            temp_pos = (temp_pos - np.median(temp_pos, axis=0))/84
            temp_pos_aff = np.asarray(temp[['real_X', 'real_Y', 'real_Z']])
            temp_pos_aff = (temp_pos_aff - np.median(temp_pos_aff, axis=0))/84
            temp_col = np.asarray(temp[['real_R', 'real_G', 'real_B']])
            temp_col = ((temp_col - np.min(temp_col, axis=0))/(np.max(temp_col, axis=0)-np.min(temp_col, axis=0))-0.5)/5

            match, unlabel = find_match(temp_gt, test_gt)

            assigns, score = cpd_match(test_pos, temp_pos, test_col, temp_col,match, curr_best_score, method='color',plot=False)
            corr = np.zeros((len(test_gt),5))

            if score is None:
                continue
            elif score > curr_best_score:
                curr_best_score = score
                curr_best_idx = i

                for k in range(5):
                    row, col = assigns[k]

                    for l, idx in enumerate(row):
                        col_label = col[l]

                        match_idx = np.argwhere(match[:,1]==idx)
                        match_ref = match[match_idx, 0]

                        if col_label == match_ref:
                            corr[idx, k] =1

                corr1 = corr[:,0]
                corr2 = np.logical_or(corr[:,0], corr[:,1])
                corr3 = np.logical_or(corr2, corr[:,2])
                corr4 = np.logical_or(corr3, corr[:,3])
                corr5 = np.logical_or(corr4, corr[:,4])
                
                acc[j, i] =  np.sum(corr1)/num_labels
                acc2[j, i] =  np.sum(corr2)/num_labels
                acc3[j, i] =  np.sum(corr3)/num_labels 
                acc4[j, i] =  np.sum(corr4)/num_labels
                acc5[j, i] =  np.sum(corr5)/num_labels

        best_scores[j] = curr_best_score
        best_score_idxs[j] = curr_best_idx

best_temp = {}
CPD_df = pd.DataFrame(columns=['Filename', 'Best_template', 'top1', 'top2','top3', 'top4', 'top5'])

for i, file in enumerate(test_files):
    best_idx = int(best_score_idxs[i])
    best_file = temp_files[best_idx]
    best_temp[file] = (best_file, acc[i,best_idx], acc5[i,best_idx])
                       
    CPD_df.loc[len(CPD_df)] = [file[:-4], best_file[:-4],acc[i,best_idx], acc2[i,best_idx], acc3[i,best_idx], acc4[i,best_idx] ,acc5[i,best_idx]]

CPD_df.to_csv('/Users/danielysprague/foco_lab/data/Acc_CPD/nomatch_NP_temp.csv')

104
0
6.csv
67
1
7.csv
65
2
2022-12-21-06.csv
54
3
20221106-21-00-09.csv
33
4
20230322-20-16-50.csv
44
5
5.csv
64
6
2023-01-06-08.csv
72
7
4.csv
63
8
20230918-17-21-0.csv
156
9
2021-12-03-w00-NP1.csv
57
10
20221106-21-47-31.csv
53
11
20230322-22-43-03.csv
42
12
20230506-12-56-00.csv
47
13
20230506-14-24-57.csv
51
14
1.csv
62
15
38_YAaDV.csv
189
16
2023-01-16-08.csv
68
17
20230510-13-25-46.csv
46
18
56_YAaDV.csv
190
19
20190928_08.csv
183
20
3.csv
64
21
2023-01-16-22.csv
78
22
2.csv
58
23
2023-01-13-07.csv
72
24
20191104_10.csv
174
25
20190929_06.csv
175
26
20230928-14-27-0.csv
154
27
64_YAaDV.csv
189
28
2023-01-23-15.csv
78
29
20190929_07.csv
176
30
2023-01-17-07.csv
79
31
20191030_03.csv
182
32
20190929_05.csv
168
33
2022-03-05-w00-NP1.csv
52
34
20190925_04.csv
176
35
11_YAaLR.csv
193
36
76_YAaDV.csv
188
37
2022-07-15-12.csv
64
38
20191030_07.csv
175
39
20190925_01.csv
180
40
2023-01-17-01.csv
83
41
20230928-11-14-0.csv
157
42
2023-01-09-15.csv
95
43
20190929_03.csv
175
44
20231013-11

In [33]:
aligned_path = '/Users/danielysprague/foco_lab/data/aligned_2024_03_11/aligned_NP'


acc = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
acc2 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
acc3 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
acc4 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
acc5 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))

test_files = [f for f in os.listdir(aligned_path) if not f.startswith('.') and not f[:-4] in skip_files]
temp_files = [f for f in os.listdir(aligned_path) if f[:-4]+'.nwb' in os.listdir('/Users/danielysprague/foco_lab/data/final_nwb/NP_og')]
print(len(files))

best_score_idxs = np.zeros(len(files))
best_scores = np.zeros(len(files))

with warnings.catch_warnings(action="ignore"):
    for j, test_file in enumerate(test_files):
        print(j)
        print(test_file)
        curr_best_score = 0
        curr_best_idx = None
        test = pd.read_csv(os.path.join(aligned_path, test_file))
        test = test.drop_duplicates(subset=['ID'], keep='first')
        test = test.fillna('')
        test_gt = np.asarray(test['ID'])
        test_pos = np.asarray(test[['aligned_x', 'aligned_y', 'aligned_z']])
        test_pos = (test_pos - np.median(test_pos, axis=0))/84
        test_pos_aff = np.asarray(test[['real_X', 'real_Y', 'real_Z']])
        test_pos_aff = (test_pos_aff - np.median(test_pos_aff, axis=0))/84
        test_col = np.asarray(test[['real_R', 'real_G', 'real_B']])
        test_col = ((test_col - np.min(test_col, axis=0))/(np.max(test_col, axis=0)-np.min(test_col, axis=0))-0.5)/5

        num_labels = len(test[test['ID']!=''])
        print(num_labels)

        for i, temp_file in enumerate(temp_files):
            if j == i:
                continue
            temp = pd.read_csv(os.path.join(aligned_path, temp_file))
            temp = temp.drop_duplicates(subset=['ID'], keep='first' )
            temp = temp.fillna('')
            temp_gt = np.asarray(temp['ID'])
            temp_pos = np.asarray(temp[['aligned_x', 'aligned_y', 'aligned_z']])
            temp_pos = (temp_pos - np.median(temp_pos, axis=0))/84
            temp_pos_aff = np.asarray(temp[['real_X', 'real_Y', 'real_Z']])
            temp_pos_aff = (temp_pos_aff - np.median(temp_pos_aff, axis=0))/84
            temp_col = np.asarray(temp[['real_R', 'real_G', 'real_B']])
            temp_col = ((temp_col - np.min(temp_col, axis=0))/(np.max(temp_col, axis=0)-np.min(temp_col, axis=0))-0.5)/5

            match, unlabel = find_match(temp_gt, test_gt)

            assigns, score = cpd_match(test_pos, temp_pos, test_col, temp_col,match, curr_best_score, method='color',plot=False)
            corr = np.zeros((len(test_gt),5))

            if score is None:
                continue
            elif score > curr_best_score:
                curr_best_score = score
                curr_best_idx = i

                for k in range(5):
                    row, col = assigns[k]

                    for l, idx in enumerate(row):
                        col_label = col[l]

                        match_idx = np.argwhere(match[:,1]==idx)
                        match_ref = match[match_idx, 0]

                        if col_label == match_ref:
                            corr[idx, k] =1

                corr1 = corr[:,0]
                corr2 = np.logical_or(corr[:,0], corr[:,1])
                corr3 = np.logical_or(corr2, corr[:,2])
                corr4 = np.logical_or(corr3, corr[:,3])
                corr5 = np.logical_or(corr4, corr[:,4])
                
                acc[j, i] =  np.sum(corr1)/num_labels
                acc2[j, i] =  np.sum(corr2)/num_labels
                acc3[j, i] =  np.sum(corr3)/num_labels 
                acc4[j, i] =  np.sum(corr4)/num_labels
                acc5[j, i] =  np.sum(corr5)/num_labels

        best_scores[j] = curr_best_score
        best_score_idxs[j] = curr_best_idx

best_temp = {}
CPD_df = pd.DataFrame(columns=['Filename', 'Best_template', 'top1', 'top2','top3', 'top4', 'top5'])

for i, file in enumerate(test_files):
    best_idx = int(best_score_idxs[i])
    best_file = temp_files[best_idx]
    best_temp[file] = (best_file, acc[i,best_idx], acc5[i,best_idx])
                       
    CPD_df.loc[len(CPD_df)] = [file[:-4], best_file[:-4],acc[i,best_idx], acc2[i,best_idx], acc3[i,best_idx], acc4[i,best_idx] ,acc5[i,best_idx]]

CPD_df.to_csv('/Users/danielysprague/foco_lab/data/Acc_CPD/match_NP_temp.csv')

104
0
6.csv
67


1
7.csv
65
2
2022-12-21-06.csv
54
3
20221106-21-00-09.csv
33
4
20230322-20-16-50.csv
44
5
5.csv
64
6
2023-01-06-08.csv
72
7
4.csv
63
8
20230918-17-21-0.csv
156
9
2021-12-03-w00-NP1.csv
57
10
20221106-21-47-31.csv
53
11
20230322-22-43-03.csv
42
12
20230506-12-56-00.csv
47
13
20230506-14-24-57.csv
51
14
1.csv
62
15
38_YAaDV.csv
189
16
2023-01-16-08.csv
68
17
20230510-13-25-46.csv
46
18
56_YAaDV.csv
190
19
20190928_08.csv
183
20
3.csv
64
21
2023-01-16-22.csv
78
22
2.csv
58
23
2023-01-13-07.csv
72
24
20191104_10.csv
174
25
20190929_06.csv
175
26
20230928-14-27-0.csv
154
27
64_YAaDV.csv
189
28
2023-01-23-15.csv
78
29
20190929_07.csv
176
30
2023-01-17-07.csv
79
31
20191030_03.csv
182
32
20190929_05.csv
168
33
2022-03-05-w00-NP1.csv
52
34
20190925_04.csv
176
35
11_YAaLR.csv
193
36
76_YAaDV.csv
188
37
2022-07-15-12.csv
64
38
20191030_07.csv
175
39
20190925_01.csv
180
40
2023-01-17-01.csv
83
41
20230928-11-14-0.csv
157
42
2023-01-09-15.csv
95
43
20190929_03.csv
175
44
20231013-11-11-0.csv
163
4

In [None]:
aligned_path = '/Users/danielysprague/foco_lab/data/aligned_2024_03_11/aligned_NP'


acc = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
acc2 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
acc3 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
acc4 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
acc5 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))

test_files = [f for f in os.listdir(aligned_path) if not f.startswith('.') and not f[:-4] in skip_files]
temp_files = [f for f in os.listdir(aligned_path) if f[:-4]+'.nwb' in os.listdir('/Users/danielysprague/foco_lab/data/final_nwb/NP_og')]
print(len(files))

best_score_idxs = np.zeros(len(files))
best_scores = np.zeros(len(files))

with warnings.catch_warnings(action="ignore"):
    for j, test_file in enumerate(test_files):
        print(j)
        print(test_file)
        curr_best_score = 0
        curr_best_idx = None
        test = pd.read_csv(os.path.join(aligned_path, test_file))
        test = test.drop_duplicates(subset=['ID'], keep='first')
        test = test.fillna('')
        test_gt = np.asarray(test['ID'])
        test_pos = np.asarray(test[['aligned_x', 'aligned_y', 'aligned_z']])
        test_pos = (test_pos - np.median(test_pos, axis=0))/84
        test_pos_aff = np.asarray(test[['real_X', 'real_Y', 'real_Z']])
        test_pos_aff = (test_pos_aff - np.median(test_pos_aff, axis=0))/84
        test_col = np.asarray(test[['real_R', 'real_G', 'real_B']])
        test_col = ((test_col - np.min(test_col, axis=0))/(np.max(test_col, axis=0)-np.min(test_col, axis=0))-0.5)/5

        num_labels = len(test[test['ID']!=''])
        print(num_labels)

        for i, temp_file in enumerate(temp_files):
            if j == i:
                continue
            temp = pd.read_csv(os.path.join(aligned_path, temp_file))
            temp = temp.drop_duplicates(subset=['ID'], keep='first' )
            temp = temp.fillna('')
            temp_gt = np.asarray(temp['ID'])
            temp_pos = np.asarray(temp[['aligned_x', 'aligned_y', 'aligned_z']])
            temp_pos = (temp_pos - np.median(temp_pos, axis=0))/84
            temp_pos_aff = np.asarray(temp[['real_X', 'real_Y', 'real_Z']])
            temp_pos_aff = (temp_pos_aff - np.median(temp_pos_aff, axis=0))/84
            temp_col = np.asarray(temp[['real_R', 'real_G', 'real_B']])
            temp_col = ((temp_col - np.min(temp_col, axis=0))/(np.max(temp_col, axis=0)-np.min(temp_col, axis=0))-0.5)/5

            match, unlabel = find_match(temp_gt, test_gt)

            assigns, score = cpd_match(test_pos, temp_pos, test_col, temp_col,match, curr_best_score, method='color',plot=False)
            corr = np.zeros((len(test_gt),5))

            if score is None:
                continue
            elif score > curr_best_score:
                curr_best_score = score
                curr_best_idx = i

                for k in range(5):
                    row, col = assigns[k]

                    for l, idx in enumerate(row):
                        col_label = col[l]

                        match_idx = np.argwhere(match[:,1]==idx)
                        match_ref = match[match_idx, 0]

                        if col_label == match_ref:
                            corr[idx, k] =1

                corr1 = corr[:,0]
                corr2 = np.logical_or(corr[:,0], corr[:,1])
                corr3 = np.logical_or(corr2, corr[:,2])
                corr4 = np.logical_or(corr3, corr[:,3])
                corr5 = np.logical_or(corr4, corr[:,4])
                
                acc[j, i] =  np.sum(corr1)/num_labels
                acc2[j, i] =  np.sum(corr2)/num_labels
                acc3[j, i] =  np.sum(corr3)/num_labels 
                acc4[j, i] =  np.sum(corr4)/num_labels
                acc5[j, i] =  np.sum(corr5)/num_labels

        best_scores[j] = curr_best_score
        best_score_idxs[j] = curr_best_idx

best_temp = {}
CPD_df = pd.DataFrame(columns=['Filename', 'Best_template', 'top1', 'top2','top3', 'top4', 'top5'])

for i, file in enumerate(test_files):
    best_idx = int(best_score_idxs[i])
    best_file = temp_files[best_idx]
    best_temp[file] = (best_file, acc[i,best_idx], acc5[i,best_idx])
                       
    CPD_df.loc[len(CPD_df)] = [file[:-4], best_file[:-4],acc[i,best_idx], acc2[i,best_idx], acc3[i,best_idx], acc4[i,best_idx] ,acc5[i,best_idx]]

CPD_df.to_csv('/Users/danielysprague/foco_lab/data/Acc_CPD/match_NP_temp.csv')

In [23]:
print(best_scores)
print(best_score_idxs)

[ 81.  15.  51.  65.  57.  90.  77.  15.  75.  36.  97.   0.  54.  54.
  90.  27.  51.  13.  85.  31.  86.   8.  39.  74.  75.  29.  29.  85.
  42.  74.  42.  44.  24.  85.  74.  99.  85.  58.  75.  76.  34.  45.
  76.  26.  58.  21.  45.   7.  31.  81.  31.  45.  93.  74.  89.  68.
  96.  45.  32.  25.  45.  42.  95.  96.  17.   8.  48.  68.  44.  18.
  76.  37.  67.   8.  45.  95.  29.  26.  85.  25.  37.  78.  42.  45.
  40.  19.  36.  31.  51.  54.  75.  58.  81.  88.  88.  76.  38.  89.
   7.  43. 103.  69.  76.   8.]


In [22]:
print(np.max(acc, axis=1))
print(np.max(acc5, axis=1))

best_temp = {}
CPD_df = pd.DataFrame(columns=['Filename', 'Best_template', 'top1', 'top2','top3', 'top4', 'top5'])

for i, file in enumerate(files):
    best_idx = int(best_score_idxs[i])
    best_file = files[best_idx]
    best_temp[file] = (best_file, acc[i,best_idx], acc5[i,best_idx])
                       
    CPD_df.loc[len(CPD_df)] = [file[:-4], best_file[:-4],acc[i,best_idx], acc2[i,best_idx], acc3[i,best_idx], acc4[i,best_idx] ,acc5[i,best_idx]]

CPD_df.to_csv('/Users/danielysprague/foco_lab/data/Acc_CPD/match_lowest_cost.csv')

[0.52238806 0.44615385 0.46296296 0.48484848 0.25       0.734375
 0.44444444 0.74603175 0.43589744 0.63157895 0.49056604 0.11904762
 0.31914894 0.33333333 0.72580645 0.64550265 0.5        0.43478261
 0.61578947 0.38797814 0.734375   0.35897436 0.46551724 0.43055556
 0.54597701 0.40571429 0.41558442 0.71428571 0.56410256 0.57954545
 0.51898734 0.35714286 0.5297619  0.51923077 0.61931818 0.65803109
 0.68617021 0.53125    0.57142857 0.33888889 0.4939759  0.40764331
 0.47368421 0.45142857 0.41717791 0.20121951 0.47727273 0.34545455
 0.45121951 0.63492063 0.5        0.46341463 0.64516129 0.56097561
 0.3125     0.45454545 0.62790698 0.24358974 0.38666667 0.30232558
 0.59210526 0.54166667 0.47457627 0.55       0.34782609 0.35294118
 0.43181818 0.43478261 0.45762712 0.68229167 0.63636364 0.45
 0.25       0.32214765 0.33536585 0.57062147 0.4972067  0.57142857
 0.65053763 0.44897959 0.45454545 0.66489362 0.62820513 0.45454545
 0.47368421 0.63541667 0.67647059 0.53409091 0.4025974  0.52
 0.461956

In [9]:
aligned_path = '/Users/danielysprague/foco_lab/data/aligned_2024_03_11/aligned_full'


In [14]:
dataset_files = [f[:-4] for f in  os.listdir('/Users/danielysprague/foco_lab/data/final_nwb/'+'EY') if not f.startswith('.')]

test_files = [f for f in os.listdir(aligned_path) if not f[:-4] in skip_files and f[:-4] in dataset_files]
temp_files = [f for f in os.listdir(aligned_path) if not f[:-4] in skip_files and f not in test_files and f != '.DS_Store']

print(len(test_files))
print(len(temp_files))

21
83


In [30]:
def acc_by_dataset(aligned_path, dataset, skip_files):
    acc = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
    acc2 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
    acc3 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
    acc4 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))
    acc5 = np.zeros((len(os.listdir(aligned_path)), len(os.listdir(aligned_path))))

    dataset_files = [f[:-4] for f in  os.listdir('/Users/danielysprague/foco_lab/data/final_nwb/'+dataset) if not f.startswith('.')]

    test_files = [f for f in os.listdir(aligned_path) if not f[:-4] in skip_files and f[:-4] in dataset_files]
    temp_files = [f for f in os.listdir(aligned_path) if not f[:-4] in skip_files and f not in test_files and f != '.DS_Store']

    acc = np.zeros((len(test_files), len(temp_files)))
    acc2 = np.zeros((len(test_files), len(temp_files)))
    acc3 = np.zeros((len(test_files), len(temp_files)))
    acc4 = np.zeros((len(test_files), len(temp_files)))
    acc5 = np.zeros((len(test_files), len(temp_files)))

    with warnings.catch_warnings(action="ignore"):
        for j, test_file in enumerate(test_files):
            print(j)
            print(test_file)
            curr_best_score = 0
            test = pd.read_csv(os.path.join(aligned_path, test_file))
            test = test.drop_duplicates(subset=['ID'], keep='first')
            test = test.fillna('')
            test_gt = np.asarray(test['ID'])
            test_pos = np.asarray(test[['aligned_x', 'aligned_y', 'aligned_z']])
            test_pos = (test_pos - np.max(test_pos, axis=0)/2)/84
            test_pos_aff = np.asarray(test[['real_X', 'real_Y', 'real_Z']])
            test_pos_aff = (test_pos_aff - np.max(test_pos_aff, axis=0)/2)/84
            test_col = np.asarray(test[['aligned_R', 'aligned_G', 'aligned_B']])
            test_col = ((test_col - np.min(test_col, axis=0))/(np.max(test_col, axis=0)-np.min(test_col, axis=0))-0.5)/5

            for i, temp_file in enumerate(temp_files):
                if j == i:
                    continue
                temp = pd.read_csv(os.path.join(aligned_path, temp_file))
                temp = temp.drop_duplicates(subset=['ID'], keep='first' )
                temp = temp.fillna('')
                temp_gt = np.asarray(temp['ID'])
                temp_pos = np.asarray(temp[['aligned_x', 'aligned_y', 'aligned_z']])
                temp_pos = (temp_pos - np.max(temp_pos, axis=0)/2)/84
                temp_pos_aff = np.asarray(temp[['real_X', 'real_Y', 'real_Z']])
                temp_pos_aff = (temp_pos_aff - np.max(temp_pos_aff, axis=0)/2)/84
                temp_col = np.asarray(temp[['aligned_R', 'aligned_G', 'aligned_B']])
                temp_col = ((temp_col - np.min(temp_col, axis=0))/(np.max(temp_col, axis=0)-np.min(temp_col, axis=0))-0.5)/5

                match, unlabel = find_match(temp_gt, test_gt)

                assigns, score = cpd_match(test_pos, temp_pos, test_col, temp_col,match, curr_best_score, method='color',plot=False)
                corr = np.zeros((len(test_gt),5))

                if score is None:
                    continue
                elif score > curr_best_score:
                    curr_best_score = score
                    curr_best_idx = i

                    for k in range(5):
                        row, col = assigns[k]

                        for l, idx in enumerate(row):
                            col_label = col[l]

                            match_idx = np.argwhere(match[:,1]==idx)
                            match_ref = match[match_idx, 0]

                            if col_label == match_ref:
                                corr[idx, k] =1

                    corr1 = corr[:,0]
                    corr2 = np.logical_or(corr[:,0], corr[:,1])
                    corr3 = np.logical_or(corr2, corr[:,2])
                    corr4 = np.logical_or(corr3, corr[:,3])
                    corr5 = np.logical_or(corr4, corr[:,4])
                    
                    acc[j, i] =  np.sum(corr1)/len(corr1) 
                    acc2[j, i] =  np.sum(corr2)/len(corr2) 
                    acc3[j, i] =  np.sum(corr3)/len(corr3) 
                    acc4[j, i] =  np.sum(corr4)/len(corr4) 
                    acc5[j, i] =  np.sum(corr5)/len(corr5) 
                    
    return acc, acc2, acc3, acc4, acc5, test_files, temp_files

accEY, acc2EY, acc3EY, acc4EY, acc5EY, EY_test_files, EY_temp_files = acc_by_dataset(aligned_path, 'EY', skip_files)
accSF, acc2SF, acc3SF, acc4SF, acc5SF, SF_test_files, SF_temp_files = acc_by_dataset(aligned_path, 'SF', skip_files)
accKK, acc2KK, acc3KK, acc4KK, acc5KK, KK_test_files, KK_temp_files = acc_by_dataset(aligned_path, 'KK', skip_files)
accSK1, acc2SK1, acc3SK1, acc4SK1, acc5SK1, SK1_test_files, SK1_temp_files = acc_by_dataset(aligned_path, 'SK1', skip_files)
accSK2, acc2SK2, acc3SK2, acc4SK2, acc5SK2, SK2_test_files, SK2_temp_files = acc_by_dataset(aligned_path, 'SK2', skip_files)
accHL, acc2HL, acc3HL, acc4HL, acc5HL, HL_test_files, HL_temp_files = acc_by_dataset(aligned_path, 'HL', skip_files)

0
20190928_08.csv
1
20191104_10.csv
2
20190929_06.csv
3
20190929_07.csv
4
20191030_03.csv
5
20190929_05.csv
6
20190925_04.csv
7
20191030_07.csv
8
20190925_01.csv
9
20190929_03.csv
10
20190929_02.csv
11
20191104_08.csv
12
20190928_05.csv
13
20190928_11.csv
14
20190928_07.csv
15
20190928_13.csv
16
20190924_02.csv
17
20190924_03.csv
18
20190928_03.csv
19
20190928_01.csv
20
20190924_01.csv
0
2022-12-21-06.csv
1
2023-01-06-08.csv
2
2023-01-16-08.csv
3
2023-01-16-22.csv
4
2023-01-13-07.csv
5
2023-01-23-15.csv
6
2023-01-17-07.csv
7
2022-07-15-12.csv
8
2023-01-17-01.csv
9
2023-01-09-15.csv
10
2023-01-19-08.csv
11
2023-01-23-21.csv
12
2023-01-05-18.csv
13
2023-01-23-08.csv
14
2023-01-10-07.csv
15
2022-06-14-01.csv
16
2023-01-09-22.csv
17
2022-06-14-07.csv
18
2022-06-14-13.csv
19
2022-06-28-01.csv
20
2023-03-07-01.csv
21
2023-01-06-01.csv
22
2023-01-18-01.csv
23
2022-07-20-01.csv
24
2023-01-16-01.csv
25
2023-01-16-15.csv
0
20230918-17-21-0.csv
1
20230928-14-27-0.csv
2
20230928-11-14-0.csv
3
2023

In [31]:
def add_to_df(acc1, acc2, acc3, acc4, acc5, test_files, temp_files, df, dataset):

    for i in range(acc1.shape[0]):
        best_idx = np.argmax(acc1[i,:])
        best_file = temp_files[best_idx]
        file = test_files[i]
        
        df.loc[len(df)] = [dataset, file[:-4], best_file[:-4], acc1[i,best_idx], acc2[i,best_idx], acc3[i,best_idx], acc4[i,best_idx], acc5[i,best_idx]]

    return df

CPD_dataset_df = pd.DataFrame(columns=['Dataset', 'Filename', 'Best_template', 'top1', 'top2', 'top3', 'top4', 'top5'])

df = add_to_df(accEY, acc2EY, acc3EY, acc4EY, acc5EY, EY_test_files, EY_temp_files, CPD_dataset_df, 'EY')
df = add_to_df(accSF, acc2SF, acc3SF, acc4SF, acc5SF, SF_test_files, SF_temp_files, df, 'SF')
df = add_to_df(accKK, acc2KK, acc3KK, acc4KK, acc5KK, KK_test_files, KK_temp_files, df, 'KK')
df = add_to_df(accSK1, acc2SK1, acc3SK1, acc4SK1, acc5SK1, SK1_test_files, SK1_temp_files, df, 'SK1')
df = add_to_df(accSK2, acc2SK2, acc3SK2, acc4SK2, acc5SK2, SK2_test_files, SK2_temp_files, df, 'SK2')
df = add_to_df(accHL, acc2HL, acc3HL, acc4HL, acc5HL, HL_test_files, HL_temp_files, df, 'HL')

df.to_csv('/Users/danielysprague/foco_lab/data/Acc_CPD/match_leave_lab_out.csv')

In [None]:
print(np.max(acc, axis=1))
print(np.max(acc5, axis=1))

best_temp = {}
CPD_df = pd.DataFrame(columns=['Filename', 'Best_template', 'top1', 'top2','top3', 'top4', 'top5'])

for i, file in enumerate(files):
    best_idx = np.argmax(acc[i,:])
    best_file = files[best_idx]
    best_temp[file] = (best_file, acc[i,best_idx], acc5[i,best_idx])
                       
    CPD_df.loc[len(CPD_df)] = [file[:-4], best_file[:-4],acc[i,best_idx], acc2[i,best_idx], acc3[i,best_idx], acc4[i,best_idx] ,acc5[i,best_idx]]

CPD_df.to_csv('/Users/danielysprague/foco_lab/data/Acc_CPD/match_all_template.csv')

# 