In [1]:
from pathlib import Path
from xml.dom import minidom
import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

import SimpleITK as sitk

from tqdm import tqdm

import os
import shutil
import filecmp

from bs4 import BeautifulSoup

from landmarks_info import *

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:

all_landmarks = [set_vert, set_fins, set_digest, set_heart, set_eyes, set_skull_front, set_skull_center, set_skull_end, set_brain]

landmarks_pointset_names = [x['file_name'] for x in all_landmarks]

print(landmarks_pointset_names)

['pointset1_vert', 'pointset2_fins', 'pointset3_digest', 'pointset4_heart', 'pointset5_eyes', 'pointset6_skull_front', 'pointset7_skull_center', 'pointset8_skull_end', 'pointset9_brain']


In [36]:
eval_path = Path('c:\\Users\\fe0968\\Documents\\Medaka\\evaluation\\')

df_v7 = pd.read_excel(eval_path / 'v7' / 'eval_pointset9_brain.xlsx')
df_v8 = pd.read_excel(eval_path / 'v8' / 'eval_pointset9_brain_v8.xlsx')

In [31]:
df_v7

Unnamed: 0.1,Unnamed: 0,sample,point_set,landmark,name1,name2,result
0,0,1248,pointset9_brain,hypophysis,cs,rs,5.919598
1,1,1248,pointset9_brain,olfactoryN_right,cs,rs,0.000000
2,2,1248,pointset9_brain,olfactoryN_left,cs,rs,0.000000
3,3,1248,pointset9_brain,glomerulosus_R,cs,rs,2.518606
4,4,1248,pointset9_brain,glomerulosus_L,cs,rs,0.864444
...,...,...,...,...,...,...,...
675,675,1226,pointset9_brain,OT_rightmost,cs,rs,15.707891
676,676,1226,pointset9_brain,OT_leftmost,cs,rs,9.173041
677,677,1226,pointset9_brain,cerebellum,cs,rs,12.299239
678,678,1226,pointset9_brain,OT cerebellum torus,cs,rs,5.187814


In [32]:
df_v8

Unnamed: 0.1,Unnamed: 0,sample,point_set,landmark,name1,name2,result
0,0,526,pointset9_brain,hypophysis,ca,pw,6.250938
1,1,526,pointset9_brain,olfactoryN_right,ca,pw,1.733112
2,2,526,pointset9_brain,olfactoryN_left,ca,pw,5.500942
3,3,526,pointset9_brain,glomerulosus_R,ca,pw,1.619890
4,4,526,pointset9_brain,glomerulosus_L,ca,pw,4.154976
...,...,...,...,...,...,...,...
2185,2185,511,pointset9_brain,OT_rightmost,ca,pw,4.431650
2186,2186,511,pointset9_brain,OT_leftmost,ca,pw,8.027395
2187,2187,511,pointset9_brain,cerebellum,ca,pw,1.163543
2188,2188,511,pointset9_brain,OT cerebellum torus,ca,pw,3.277326


## Make a difference between two versions of results (old and new)

In [33]:
df_v7 = df_v7.drop(['Unnamed: 0'], axis=1)
df_v8 = df_v8.drop(['Unnamed: 0'], axis=1)

In [35]:
df = pd.concat([df_v7,df_v8]).drop_duplicates(keep=False)
df

Unnamed: 0,sample,point_set,landmark,name1,name2,result
0,526,pointset9_brain,hypophysis,ca,pw,6.250938
1,526,pointset9_brain,olfactoryN_right,ca,pw,1.733112
2,526,pointset9_brain,olfactoryN_left,ca,pw,5.500942
3,526,pointset9_brain,glomerulosus_R,ca,pw,1.619890
4,526,pointset9_brain,glomerulosus_L,ca,pw,4.154976
...,...,...,...,...,...,...
2185,511,pointset9_brain,OT_rightmost,ca,pw,4.431650
2186,511,pointset9_brain,OT_leftmost,ca,pw,8.027395
2187,511,pointset9_brain,cerebellum,ca,pw,1.163543
2188,511,pointset9_brain,OT cerebellum torus,ca,pw,3.277326


## Process all

In [44]:
for i in range(len(all_landmarks)):
    
    
    n = all_landmarks[i]['file_name']
    print('Processing:', n)
    
    df_old = pd.read_excel(eval_path / 'v7' / f'eval_{n}.xlsx')
    df_new = pd.read_excel(eval_path / 'v8' / f'eval_{n}_v8.xlsx')
    
    df_old = df_old.drop(['Unnamed: 0'], axis=1)
    df_new = df_new.drop(['Unnamed: 0'], axis=1)
    
    df_diff = pd.concat([df_old,df_new]).drop_duplicates(keep=False)
    
    df_diff.to_excel(eval_path / 'diff_v7_v8' / f'eval_{n}_diff_v7_v8.xlsx')
    
    
    

Processing: pointset1_vert  NEW: 72
Processing: pointset2_fins  NEW: 164
Processing: pointset3_digest  NEW: 220
Processing: pointset4_heart  NEW: 440
Processing: pointset5_eyes  NEW: 0
Processing: pointset6_skull_front  NEW: 240
Processing: pointset7_skull_center  NEW: 246
Processing: pointset8_skull_end  NEW: 240
Processing: pointset9_brain  NEW: 1510


## Read manual correction to make Filtered landmarks

In [4]:
from openpyxl import load_workbook

In [5]:
def get_landmark(df, sample, user, landmark_name):
    res = df[(df['sample'] == sample) & (df['name'] == user) & (df['landmark'] == landmark_name)]
    if (len(res) > 1):
        print('Error: duplicates')
        print(res)
  
    if (len(res) == 0):
        print('Error: No such data in the results')
        print(sample)
        print(user)
        print(landmark_name)
        
    #x = round(res.iloc[0]['x'],2)
    #y = round(res.iloc[0]['y'],2)
    #z = round(res.iloc[0]['z'],2)
    
    x = res.iloc[0]['x']
    y = res.iloc[0]['y']
    z = res.iloc[0]['z']
        
    return x,y,z
  

In [6]:
evaluation_path = Path('c:\\Users\\fe0968\\Documents\\data\\medaka\\landmarks\\evaluation_vis\\')
results_path = Path('c:\\Users\\fe0968\\Documents\\Medaka\\')

folder_v7 = 'by_landmark_centered'
folder_diff_v7_v8 = 'by_landmark_centered_diff_v7_v8'

In [7]:
res_df = pd.read_excel(results_path / 'results_landmarks_v8.xlsx')

In [35]:
res_df = res_df.drop(['Unnamed: 0'], axis=1)
res_df.dtypes

sample         int64
point_set     object
landmark      object
name          object
x            float64
y            float64
z            float64
dtype: object

In [36]:
res_df

Unnamed: 0,sample,point_set,landmark,name,x,y,z
0,526,pointset1_vert,Transition skull to spine,kp,293.000000,282.826248,955.343394
1,526,pointset1_vert,Vert1,kp,289.983033,251.199781,986.170074
2,526,pointset1_vert,Vert2,kp,293.638389,233.759151,1013.763606
3,526,pointset1_vert,Vert3,kp,294.000000,226.785634,1050.797902
4,526,pointset1_vert,Vert4,kp,296.621422,223.064777,1095.812249
...,...,...,...,...,...,...,...
23355,511,pointset9_brain,OT_rightmost,pw,173.000003,236.560874,654.491076
23356,511,pointset9_brain,OT_leftmost,pw,364.000005,236.560874,650.138201
23357,511,pointset9_brain,cerebellum,pw,274.262913,191.952753,706.000000
23358,511,pointset9_brain,OT cerebellum torus,pw,267.927534,203.061912,657.787682


In [49]:
res_df = res_df.drop_duplicates()
print('After dropping duplicates:',len(res_df))

After dropping duplicates: 23341


In [69]:
sample_list = []
pointset_list = []
landmark_list = []
x_list = []
y_list = []
z_list = []
var_list = []

In [70]:
def process_evaluation_files(folder, file_postfix):

    for pointset in all_landmarks:

        pointset_name = pointset['name'] 

        #print(pointset['file_name'])

        for land in pointset['landmarks']:

            i = str(pointset['landmarks'].index(land)).zfill(2)

            file_name = f'{i}_{land}{file_postfix}.xlsx' 
            #print(file_name)

            path = evaluation_path / folder / pointset['file_name'] / file_name

            if not Path.exists(path):
                continue

            # Open Excel stats file
            wb = load_workbook(path)
            ws = wb.active

            # Select name fields
            if ws['F1'].value == 'name1' and ws['G1'].value == 'name2':
                name1_selector = 'F'
                name2_selector = 'G'
                sample_selector = 'C'
            elif ws['D1'].value == 'name1' and ws['E1'].value == 'name2':
                name1_selector = 'D'
                name2_selector = 'E'
                sample_selector = 'A'
            else:
                print('Error with data columns')


            cells_count = len(ws[name1_selector])


            # For all cells in a column
            for i in range(1, cells_count):

                drop_user1 = False
                drop_user2 = False
                variance = -1

                if ws[f'{name1_selector}{i+1}'].font.color != None:
                    if ws[f'{name1_selector}{i+1}'].font.color.rgb == 'FFFF0000':
                        drop_user1 = True
                        #print(ws[f'{name1}{i+1}'].value)
                if ws[f'{name2_selector}{i+1}'].font.color != None:
                    if ws[f'{name2_selector}{i+1}'].font.color.rgb == 'FFFF0000':
                        drop_user2 = True

                name1 = ws[f'{name1_selector}{i+1}'].value
                name2 = ws[f'{name2_selector}{i+1}'].value
                sample = ws[f'{sample_selector}{i+1}'].value
                
                # Drop both
                if drop_user1 and drop_user2:
                    continue                

                if not drop_user1 and drop_user2:     
                    x,y,z = get_landmark(res_df, sample, name1, land)
                    variance = -1
                    #print('Drop', name2)

                if drop_user1 and not drop_user2:     
                    x,y,z = get_landmark(res_df, sample, name2, land)
                    variance = -1
                    #print('Drop', name1)

                # Average both landmarks
                if not drop_user1 and not drop_user2:     
                    x1,y1,z1 = get_landmark(res_df, sample, name1, land)
                    x2,y2,z2 = get_landmark(res_df, sample, name2, land)

                    x = (x1 + x2) / 2.0
                    y = (y1 + y2) / 2.0
                    z = (z1 + z2) / 2.0

                    variance = round(get_distance([x1,y1,z1], [x2,y2,z2]), 2)
                    #print(variance)

                sample_list.append(sample)
                pointset_list.append(pointset_name)
                landmark_list.append(land)
                x_list.append(round(x,2))
                y_list.append(round(y,2))
                z_list.append(round(z,2))
                var_list.append(round(variance,2))
                        

In [71]:
process_evaluation_files(folder_v7, '')
process_evaluation_files(folder_diff_v7_v8, '_diff_v7_v8')
print('Finished')

Finished


In [72]:
data = {
            'group': pointset_list,
            'landmark': landmark_list,
            'sample': sample_list,
            'x': x_list,      
            'y': y_list,
            'z': z_list,
            'var': var_list
           }

res_filtered_df = pd.DataFrame(data)

In [73]:
res_filtered_df = res_filtered_df.drop_duplicates()
res_filtered_df

Unnamed: 0,group,landmark,sample,x,y,z,var
0,Vert,Transition skull to spine,720,271.00,265.83,1195.91,-1.00
1,Vert,Transition skull to spine,1391,257.65,231.66,428.00,-1.00
2,Vert,Transition skull to spine,471,350.00,324.92,1070.82,-1.00
3,Vert,Transition skull to spine,470,356.00,318.80,1122.67,-1.00
4,Vert,Transition skull to spine,871,313.98,326.96,546.63,13.40
...,...,...,...,...,...,...,...
11079,Brain,epiphysis,685,239.10,244.52,715.88,0.48
11080,Brain,epiphysis,1380,254.79,190.15,200.89,0.48
11081,Brain,epiphysis,1230,288.39,254.39,450.33,0.34
11082,Brain,epiphysis,834,271.89,357.99,292.88,0.32


In [74]:
res_filtered_df[res_filtered_df.duplicated()]

Unnamed: 0,group,landmark,sample,x,y,z,var


## Make statistics

In [83]:
land_res = res_filtered_df[res_filtered_df['landmark'] == 'Transition skull to spine']
land_res

Unnamed: 0,group,landmark,sample,x,y,z,var
0,Vert,Transition skull to spine,720,271.00,265.83,1195.91,-1.00
1,Vert,Transition skull to spine,1391,257.65,231.66,428.00,-1.00
2,Vert,Transition skull to spine,471,350.00,324.92,1070.82,-1.00
3,Vert,Transition skull to spine,470,356.00,318.80,1122.67,-1.00
4,Vert,Transition skull to spine,871,313.98,326.96,546.63,13.40
...,...,...,...,...,...,...,...
7987,Vert,Transition skull to spine,974,223.17,224.06,427.43,4.52
7988,Vert,Transition skull to spine,966,254.00,214.33,717.06,3.85
7989,Vert,Transition skull to spine,1197,312.50,275.31,1141.23,3.06
7990,Vert,Transition skull to spine,1412,315.74,307.93,530.01,2.67


In [84]:
land_res_pairs = land_res[land_res['var'] > 0]
land_res_pairs

Unnamed: 0,group,landmark,sample,x,y,z,var
4,Vert,Transition skull to spine,871,313.98,326.96,546.63,13.40
5,Vert,Transition skull to spine,531,248.41,238.69,902.83,12.94
6,Vert,Transition skull to spine,1274,341.52,333.64,715.62,12.92
7,Vert,Transition skull to spine,1423,303.26,281.95,456.40,12.71
8,Vert,Transition skull to spine,1393,338.18,317.27,549.76,12.03
...,...,...,...,...,...,...,...
7987,Vert,Transition skull to spine,974,223.17,224.06,427.43,4.52
7988,Vert,Transition skull to spine,966,254.00,214.33,717.06,3.85
7989,Vert,Transition skull to spine,1197,312.50,275.31,1141.23,3.06
7990,Vert,Transition skull to spine,1412,315.74,307.93,530.01,2.67


In [94]:
pointset_list = []
land_list = []
count_list = []
mean_list = []
std_list = []
max_list = []

for pointset in all_landmarks:

        pointset_name = pointset['name'] 
        #print('-----------------------')
        #print(pointset_name)
        #print('-----------------------')

        #print(pointset['file_name'])

        for land in pointset['landmarks']:
            
            land_res = res_filtered_df[res_filtered_df['landmark'] == land]
            stat_count = len(land_res)
            
            land_res_pairs = land_res[land_res['var'] > 0] 
            m = land_res_pairs.loc[:, 'var'].mean()
            s = land_res_pairs.loc[:, 'var'].std()
            mx = land_res_pairs.loc[:, 'var'].max()
            
            pointset_list.append(pointset_name)
            land_list.append(land)
            count_list.append(stat_count)
            mean_list.append(m)
            std_list.append(s)
            max_list.append(mx)
            
            
            #print(f'{land}. count: {stat_count}, mean: {round(m,1)}, std: {round(s,1)}')
            
data = {
            'group': pointset_list,
            'landmark': land_list,
            'count': count_list,
            'mean': mean_list,      
            'std': std_list,
            'max': max_list
           }

stat_df = pd.DataFrame(data).sort_values('mean', ascending=False)
stat_df

Unnamed: 0,group,landmark,count,mean,std,max
10,Fins,Abdominal_fins back 2_left,217,28.523056,126.951059,1508.65
8,Fins,Pectoral_dorsal most breast fin to body connec...,215,23.455146,63.747886,915.81
7,Fins,Pectoral_dorsal most breast fin to body connec...,215,18.456893,13.704464,83.48
9,Fins,Abdominal_fins back 1_right,217,16.20588,8.399423,70.47
14,Heart,sinus venosus,215,9.573382,6.417308,35.66
42,Skull End,fusion of epibranchial artery 2,213,9.06225,76.261524,1081.15
12,Digest,esophagus,211,8.256219,5.371706,28.48
51,Brain,OT_leftmost,218,7.956667,5.660804,30.4
32,Skull Front,upper jaw channel,191,7.916768,6.525835,28.25
50,Brain,OT_rightmost,219,7.246714,5.457124,28.41


In [61]:
res_filtered_df[res_filtered_df.duplicated()]

Unnamed: 0,group,landmark,sample,x,y,z,var
8193,Vert,Transition skull to spine,1300,327.09,313.02,756.4,5.95
8194,Vert,Transition skull to spine,1300,327.09,313.02,756.4,5.95
8207,Vert,Vert1,1300,330.79,275.04,805.47,1.04
8208,Vert,Vert1,1300,330.79,275.04,805.47,1.04
8219,Vert,Vert2,1300,332.32,261.77,845.94,2.15
8220,Vert,Vert2,1300,332.32,261.77,845.94,2.15
8233,Vert,Vert3,1300,333.83,257.16,887.39,1.09
8234,Vert,Vert3,1300,333.83,257.16,887.39,1.09
8242,Vert,Vert4,1300,331.55,251.9,936.97,2.08
8243,Vert,Vert4,1300,331.55,251.9,936.97,2.08


In [92]:
wb = load_workbook('03_tongue tip.xlsx')
ws = wb.active

# Select name fields
if ws['F1'].value == 'name1' and ws['G1'].value == 'name2':
    name1_selector = 'F'
    name2_selector = 'G'
    sample_selector = 'C'
elif ws['D1'].value == 'name1' and ws['E1'].value == 'name2':
    name1_selector = 'D'
    name2_selector = 'E'
    sample_selector = 'A'
else:
    print('Error with data columns')
    
print(sample_selector, name1_selector, name2_selector)

A D E


In [112]:
cells_count = len(ws[name1_selector])


# For all cells in a column
for i in range(1, cells_count):
    
    drop_user1 = False
    drop_user2 = False
    variance = -1
    
    if ws[f'{name1_selector}{i+1}'].font.color != None:
        if ws[f'{name1_selector}{i+1}'].font.color.rgb == 'FFFF0000':
            drop_user1 = True
            #print(ws[f'{name1}{i+1}'].value)
    if ws[f'{name2_selector}{i+1}'].font.color != None:
        if ws[f'{name2_selector}{i+1}'].font.color.rgb == 'FFFF0000':
            drop_user2 = True
            
    name1 = ws[f'{name1_selector}{i+1}'].value
    name2 = ws[f'{name2_selector}{i+1}'].value
    sample = ws[f'{sample_selector}{i+1}'].value
            
    if not drop_user1 and drop_user2:     
        x,y,z = get_landmark(res_df, sample, name1, 'tongue tip')
        variance = -1
        #print('Drop', name2)
        
    if drop_user1 and not drop_user2:     
        x,y,z = get_landmark(res_df, sample, name2, 'tongue tip')
        variance = -1
        #print('Drop', name1)
        
    if not drop_user1 and not drop_user2:     
        x1,y1,z1 = get_landmark(res_df, sample, name1, 'tongue tip')
        x2,y2,z2 = get_landmark(res_df, sample, name2, 'tongue tip')
        
        x = (x1 + x2) / 2.0
        y = (y1 + y2) / 2.0
        z = (z1 + z2) / 2.0
        
        variance = round(get_distance([x1,y1,z1], [x2,y2,z2]), 2)
        #print(variance)
        
    
        
            


Unnamed: 0,sample,point_set,landmark,name,x,y,z


In [50]:
pointset = all_landmarks[4]
print(pointset)

{'name': 'Eyes', 'file_name': 'pointset5_eyes', 'landmarks': ['optic nerve head 1_right', 'optic nerve head 2_left', 'optic chiasm_crossing', 'most_anterior_right', 'most_anterior_left', 'most_posterior_right', 'most_posterior_left', 'most_dorsal_right', 'most_dorsal_left', 'most_ventral_right', 'most_ventral_left']}


In [None]:
data = {'sample': sample_list,
           'point_set': pointset_list,
            'landmark': landmark_list,
            'name': user_list,
            'x': x_list,      
            'y': y_list,
            'z': z_list,
           }

    res_df = pd.DataFrame(data)
    
    if save:
        res_df.to_excel(file_name)