In [54]:
from pathlib import Path
from xml.dom import minidom
import numpy as np
import pandas as pd

import os
import filecmp

from bs4 import BeautifulSoup

In [10]:
path = Path("c:\\Users\\fe0968\\Documents\\data\\medaka\\landmarks\\workshop_landmarks_selected\\")

In [11]:
path_alex = path / 'pointsets_alexey'
path_TT = path / 'Pointsets_803_4_TT'
path_JB = path / 'Pointsets_JB'
path_JB2 = path / 'Pointsets_JB2'

In [131]:
set_vert = {
    'name': 'Vert',
    'file_name': 'PointSet1_Vert'.lower(),
    'landmarks': [
        'Vert1',
        'Vert2',
        'Vert3',
        'Vert4',
        'Vert5',
        'Vert_Last_Center'
    ]
}

set_fins = {
    'name': 'Fins',
    'file_name': 'PointSet2_Fins'.lower(),
    'landmarks': [
        'Pectoral_dorsal most breast fin to body connection 1_right',
        'Pectoral_dorsal most breast fin to body connection 2_left',
        'Abdominal_fins back 1_right',
        'Abdominal_fins back 2_left'
    ]
}

set_digest = {
    'name': 'Digest',
    'file_name': 'PointSet3_Digest'.lower(),
    'landmarks': [
        'anus_Center',
        'esophagus'
    ]
}

set_heart = {
    'name': 'Heart',
    'file_name': 'PointSet4_Heart'.lower(),
    'landmarks': [
        'tip of bulbus arteriosus vessel inside',
        'sinus venosus',
        'apex of ventricle',
        'anterior most point of ventricle'
    ]
}

set_eyes = {
    'name': 'Eyes',
    'file_name': 'PointSet5_Eyes'.lower(),
    'landmarks': [
        'optic nerve head 1_right',
        'optic nerve head 2_left',
        'optic chiasm_crossing',
        'most_anterior_right',
        'most_anterior_lef',
        'most_posterior_right',
        'most_posterior_left',
        'most_dorsal_right',
        'most_dorsal_left',
        'most_ventral_right',
        'most_ventral_left'
    ]
}

set_skull_front = {
    'name': 'Skull Front',
    'file_name': 'PointSet6_Skull_Front'.lower(),
    'landmarks': [
        'ventral side of nostril outlet right',
        'ventral side of nostril outlet left',
        'dorsal side of nostril outlet right',
        'dorsal side of nostril outlet left',
        'mandible dentary',
        'tongue tip',
        'upper jaw channel',
        'hyoid fusion'
    ]
}

set_skull_center = {
    'name': 'Skull Center',
    'file_name': 'PointSet7_Skull_Center'.lower(),
    'landmarks': [
        'subhypophysis bone',
        'hyoid between branchial arches',
        'split of afferent branchial artery 1',
        'split of afferent branchial artery 2',
        'split of afferent branchial artery 3',
        'gills bone right',
        'gills bone left'
    ]
}

set_skull_end = {
    'name': 'Skull End',
    'file_name': 'PointSet8_Skull_End'.lower(),
    'landmarks': [
        'skull landmark A right',
        'skull landmark A left',
        'transition skull to spine',
        'fusion of epibranchial artery 2',
        'center of utricle right',
        'center of utricle left'
    ]
}

set_brain = {
    'name': 'Brain',
    'file_name': 'PointSet9_Brain'.lower(),
    'landmarks': [
    'hypophysis',
    'olfactoryN_right',
    'olfactoryN_left',
    'glomerulosus_R',
    'glomerulosus_L',
    'OT_rightmost',
    'OT_leftmost',
    'cerebellum',
    'OT cerebellum torus',
    'epiphysis'
    ]
}

all_landmarks = [set_vert, set_fins, set_digest, set_heart, set_eyes, set_skull_front, set_skull_center, set_skull_end, set_brain]

landmarks_pointset_names = [x['file_name'] for x in all_landmarks]

participants = ['jf', 'tc', 'cs', 'ra', 'kk', 'vc', 'bew', 'ttt', 'kp', 'jo', 'jvm', 'tt']

participants_names = {'jf': 'Jana',
                     'jo': 'Jasmin'}

print(landmarks_pointset_names)

['pointset1_vert', 'pointset2_fins', 'pointset3_digest', 'pointset4_heart', 'pointset5_eyes', 'pointset6_skull_front', 'pointset7_skull_center', 'pointset8_skull_end', 'pointset9_brain']


In [50]:
def read_landmarks(file_name, mode='def'):
    
    if mode == 'lowercase':
        file_name = file_name.lower()
    
    with open(str(file_name), 'r') as f:
        data = f.read()
    
    xml_data = BeautifulSoup(data, "xml")
    points = xml_data.find_all('point')
    
    landmarks = []

    for p in points:
        if p.find('x').text == '0' and p.find('y').text == '0' and p.find('z').text == '0':
            continue

        x = float(p.find('x').text)
        y = float(p.find('y').text)
        z = float(p.find('z').text)

        landmarks.append(np.asarray([x,y,z]))
        
    f.close()
        
    return landmarks

def get_distance(p1, p2):
    return np.sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2 + (p1[2] - p2[2])**2)

def print_landmark_comparison(landmark_set, landmark1, landmark2, landmark3, landmark4):
     
    #print('------------------------------------------------------------------------')
    print(landmark_set['name'])
    #print('------------------------------------------------------------------------')
    data = {'landmark': landmark_set['landmarks'],
           'AE vs TT': [get_distance(landmark1[i], landmark2[i]) for i in range(len(landmark_set['landmarks']))],
            'AE vs JB': [get_distance(landmark1[i], landmark3[i]) for i in range(len(landmark_set['landmarks']))],
            'TT vs JB': [get_distance(landmark2[i], landmark3[i]) for i in range(len(landmark_set['landmarks']))],
            'JB vs JB2': [get_distance(landmark3[i], landmark4[i]) for i in range(len(landmark_set['landmarks']))]
           }
    
    df = pd.DataFrame(data)
    return df

    ##print(df)
    ##print()
    ##print(df.describe())
    
def read_landmarks_all_three(landmark_set):
    land_ae = read_landmarks(path_alex / landmark_set['file_name'])
    land_tt = read_landmarks(path_TT / landmark_set['file_name'])
    land_jb = read_landmarks(path_JB / landmark_set['file_name'])
    land_jb2 = read_landmarks(path_JB2 / landmark_set['file_name'])

    return land_ae, land_tt, land_jb, land_jb2

def show_results(i):
    land_ae, land_tt, land_jb, land_jb2 = read_landmarks_all_three(all_landmarks[i])
    df = print_landmark_comparison(all_landmarks[i], land_ae, land_tt, land_jb, land_jb2)
    return df.round(1).style.pipe(make_pretty)
    return df.round(1).style.pipe(make_pretty)

def make_pretty(styler):
    #styler.set_caption("Weather Conditions")
    #styler.format(rain_condition)
    #styler.format_index(lambda v: v.strftime("%A"))
    styler.background_gradient(axis=None, vmin=0, vmax=20, cmap="coolwarm")
    #styler.background_gradient(axis=None, cmap="coolwarm")
    return styler
    
        

## Landmark Consistancy

In [14]:
show_results(0)

FileNotFoundError: [Errno 2] No such file or directory: 'c:\\Users\\fe0968\\Documents\\data\\medaka\\landmarks\\workshop_landmarks_selected\\pointsets_alexey\\PointSet1_Vert.mps'

In [233]:
show_results(1)

Fins


Unnamed: 0,landmark,AE vs TT,AE vs JB,TT vs JB,JB vs JB2
0,Pectoral_dorsal most breast fin to body connection 1_right,8.8,139.7,136.9,2.2
1,Pectoral_dorsal most breast fin to body connection 2_left,12.1,140.9,140.4,2.1
2,Abdominal_fins back 1_right,15.6,12.1,6.5,5.4
3,Abdominal_fins back 2_left,7.5,2.0,5.8,11.1


In [234]:
show_results(2)

Digest


Unnamed: 0,landmark,AE vs TT,AE vs JB,TT vs JB,JB vs JB2
0,anus_Center,0.8,15.5,14.9,1.2
1,esophagus,4.9,1.4,5.3,3.2


In [235]:
show_results(3)

Heart


Unnamed: 0,landmark,AE vs TT,AE vs JB,TT vs JB,JB vs JB2
0,tip of bulbus arteriosus vessel inside,10.8,1.1,10.9,1.8
1,sinus venosus,54.7,11.3,45.7,40.3
2,apex of ventricle,7.0,30.2,37.0,37.8
3,anterior most point of ventricle,1.3,3.2,3.7,0.2


In [236]:
show_results(4)

Eyes


Unnamed: 0,landmark,AE vs TT,AE vs JB,TT vs JB,JB vs JB2
0,optic nerve head 1_right,4.1,1.4,5.5,7.2
1,optic nerve head 2_left,2.0,6.3,8.1,8.2
2,optic chiasm_crossing,6.2,2.0,7.1,2.2
3,most_anterior_right,7.6,5.7,1.9,8.1
4,most_anterior_lef,3.3,6.3,7.0,7.8
5,most_posterior_right,5.6,1.1,4.6,1.3
6,most_posterior_left,6.5,3.2,3.8,2.9
7,most_dorsal_right,2.9,6.4,6.8,1.3
8,most_dorsal_left,1.7,7.3,6.7,3.0
9,most_ventral_right,8.5,4.1,7.1,8.7


In [237]:
show_results(5)

Skull Front


Unnamed: 0,landmark,AE vs TT,AE vs JB,TT vs JB,JB vs JB2
0,ventral side of nostril outlet right,5.7,3.9,2.4,1.7
1,ventral side of nostril outlet left,1.0,1.5,1.6,0.2
2,dorsal side of nostril outlet right,1.2,2.3,1.5,2.1
3,dorsal side of nostril outlet left,2.2,1.8,2.3,0.9
4,mandible dentary,0.8,11.7,12.1,3.7
5,tongue tip,0.8,1.0,0.3,1.0
6,upper jaw channel,7.9,6.7,1.5,3.3
7,hyoid fusion,4.3,3.6,0.9,3.2


In [238]:
show_results(6)

Skull Center


Unnamed: 0,landmark,AE vs TT,AE vs JB,TT vs JB,JB vs JB2
0,subhypophysis bone,7.1,8.6,1.9,5.2
1,hyoid between branchial arches,2.2,1.7,0.8,11.9
2,split of afferent branchial artery 1,4.7,3.6,7.2,2.1
3,split of afferent branchial artery 2,3.7,3.3,6.8,2.7
4,split of afferent branchial artery 3,20.7,4.0,19.7,2.0
5,gills bone right,3.0,1.0,3.8,4.0
6,gills bone left,3.3,1.3,4.1,4.1


In [240]:
show_results(7)

Skull End


Unnamed: 0,landmark,AE vs TT,AE vs JB,TT vs JB,JB vs JB2
0,skull landmark A right,1.4,2.7,3.7,14.7
1,skull landmark A left,2.9,2.1,1.1,17.4
2,transition skull to spine,2.0,3.4,4.9,3.7
3,fusion of epibranchial artery 2,3.7,11.4,9.6,3.7
4,center of utricle right,9.1,9.1,1.2,1.3
5,center of utricle left,8.5,8.3,0.5,1.2


## Testing

In [115]:
data_path = Path('c:\\Users\\fe0968\\Documents\\Medaka\\inbredpanel_pointsets_v2\\inbredpanel_pointsets\\')

In [116]:
files = os.listdir(data_path)
files = [f for f in files if os.path.isfile(data_path / f)]
len(files)

1813

In [124]:
duplicates = []
same_name_diff_content = []

for f in files:
    if '(1)' in f:
        if filecmp.cmp(data_path / f, data_path / f.replace(' (1)', '')):
            duplicates.append(f)
        else:
            same_name_diff_content.append(f)
            
            
print('Duplicates:', len(duplicates))
        
        
filtered_files = [x for x in files if (x not in duplicates)]
print('New:', len(filtered_files))

print()
print('-----------------------------------------------')
print('WARNING. Same name, different content', same_name_diff_content)
print('-----------------------------------------------')

filtered_files = [x for x in filtered_files if (x not in same_name_diff_content)]
print('New:', len(filtered_files))

Duplicates: 80
New: 1733

-----------------------------------------------
-----------------------------------------------
New: 1732


In [135]:
samples = []
points = []
names = []

for f in filtered_files:
    
    s = f.split('_')
    samples.append(s[0])
    names.append(s[-1][:-4].lower())
    #points.append(s[1])
    points.append(f.replace(f'{s[0]}_', '').replace(f'_{s[-1]}', '').lower())
    
data = {'sample': samples,
        'points_set': points,
       'name': names}


print(landmarks_pointset_names)

print()
print('-----------------------------------------------')
print('WARNING: Problematic point set naming')
print('-----------------------------------------------')
print([p for p in points if p not in landmarks_pointset_names])
    
# TODO: Make a list of exact file names to check manually


df = pd.DataFrame(data)
    

['pointset1_vert', 'pointset2_fins', 'pointset3_digest', 'pointset4_heart', 'pointset5_eyes', 'pointset6_skull_front', 'pointset7_skull_center', 'pointset8_skull_end', 'pointset9_brain']

-----------------------------------------------
-----------------------------------------------
['digest', 'skull_front', 'heart', 'ointset3_digest', 'pointset2_vert', 'pointset2', 'pointset3-digest', 'pointset9', 'pointset2']


In [134]:
people = set(names)
print(people)

print('-----------------------------------------------')
print('WARNING: Problematic names')
print('-----------------------------------------------')
for x in people:
    if x not in participants:
        print('Problematic name: ', x)
        print('Count:', names.count(x))

{'jf', 'tc', 'cs', 'ra', 'kk', 'fins', 'brain', 'vc', 'bew', 'ttt', 'kp', 'jo', 'jvm', 'tt'}
-----------------------------------------------
-----------------------------------------------
Problematic name:  fins
Count: 2
Problematic name:  brain
Count: 1


In [130]:
print(names.count('brain'))
print(names.count('fins'))

1
2


In [145]:
df.groupby(['name']).count()

Unnamed: 0_level_0,sample,points_set
name,Unnamed: 1_level_1,Unnamed: 2_level_1
bew,120,120
brain,1,1
cs,177,177
fins,2,2
jf,36,36
jo,299,299
jvm,180,180
kk,180,180
kp,108,108
ra,160,160


In [147]:
read_landmarks(data_path / '1064_pointset1_vert_JO.mps')

[array([410.68527946, 361.34097292, 837.83204157]),
 array([426.68527946, 318.24754659, 907.56993833]),
 array([424.81810551, 311.09290667, 963.36025574]),
 array([ 421.68527946,  308.14398353, 1020.22346386]),
 array([ 418.13282604,  311.98799983, 1076.01378127]),
 array([ 409.99999999,  318.95207945, 1136.04074874])]

In [148]:
read_landmarks(data_path / '1064_pointset1_vert_JO (1).mps')

[array([284.99999999, 262.78677778, 578.70023437]),
 array([291.18236118, 238.71703727, 618.7555187 ]),
 array([295.99999999, 227.14703661, 656.28763551]),
 array([300.99999999, 223.67768127, 692.55816857]),
 array([306.99999999, 221.15451376, 729.14409756]),
 array([311.99999999, 220.83911782, 766.04542249]),
 array([ 195.00365284,  366.0317758 , 1767.        ])]

In [60]:
filecmp.cmp(data_path / '1100_PointSet6_Skull_Front_RA.mps', data_path / '1100_PointSet6_Skull_Front_RA (1).mps')

True

In [31]:
[f for f in os.listdir(data_path) if os.path.isfile(f)]

[]

In [38]:
len([x for x in data_path.iterdir() if x.is_file()])

1816

SyntaxError: invalid syntax (<ipython-input-103-243c0d84341c>, line 1)