## Convert annotation into um and generate downsampled txt file from resampled file

Generating pandas data frame from vaa3d annotation.eswc file

1. convert them into real units (ie. um) based acquisition microns/pixels
2. Resample this eswc file in V3d (plugin- neuron untilities-resample swc) in a fixes stepsize (ie.1 um)
3. Load the resampled file and downsample them to the corresponding dimensions (ie.25um)
4. saves as transformix compatible format (in case for sample2ara)

__Note that step 2 has to be done in V3d__

In progress: get endings based on the parent name


In [1]:
import os

import pandas as pd

import numpy as np

import tkinter as tk
import tkinter.filedialog as fdialog
from tkinter import simpledialog

In [2]:
anno_file=fdialog.askopenfile(initialdir='Z:\\', title='Select the eswc file containing the annotations').name 

xy = simpledialog.askfloat("Input", "What is the x and y resolution in um?",
                               minvalue=0.0, maxvalue=100)
z = simpledialog.askfloat("Input", "What is the z resolution in um?",
                               minvalue=0.0, maxvalue=100)
goal_xyz = simpledialog.askfloat("Input", "What do you want to downsample the resolution to '(in um)' ?",
                               minvalue=10, maxvalue=100)

outdir = fdialog.askdirectory(title='Please select the output directory')

In [66]:
anno=open(anno_file,'r')
anno_data=anno.readlines()
# heading is stored in anno_data[2], 1st line basically useless

headings=anno_data[2].rstrip('\n').split(' ')
annotations=[lines.rstrip('\n').split(' ') for lines in anno_data[3:]]

In [68]:
annotation_df=pd.DataFrame(annotations, columns=headings)
annotation_df.columns

Index(['#n', 'type', 'x', 'y', 'z', 'radius', 'parent', 'seg_id', 'level',
       'mode', 'timestamp', 'TFresindex'],
      dtype='object')

In [69]:
annotation_df.head()

Unnamed: 0,#n,type,x,y,z,radius,parent,seg_id,level,mode,timestamp,TFresindex
0,104067,3,6014.54,2950.27,710.098,1.0,104068,0,0,0,0,0
1,104068,3,6017.9,2950.36,710.257,1.0,104069,0,0,0,0,0
2,104069,3,6020.37,2949.71,710.458,1.0,104070,0,0,0,0,0
3,104070,3,6022.29,2949.62,710.599,1.0,104071,0,0,0,0,0
4,104071,3,6026.89,2950.93,710.979,1.0,104072,0,0,0,0,0


In [70]:
annotation_df['x']=pd.to_numeric(annotation_df['x'])*xy
annotation_df['y']=pd.to_numeric(annotation_df['y'])*xy
annotation_df['z']=pd.to_numeric(annotation_df['z'])*z
annotation_df.head()

Unnamed: 0,#n,type,x,y,z,radius,parent,seg_id,level,mode,timestamp,TFresindex
0,104067,3,4811.632,2360.216,3550.49,1.0,104068,0,0,0,0,0
1,104068,3,4814.32,2360.288,3551.285,1.0,104069,0,0,0,0,0
2,104069,3,4816.296,2359.768,3552.29,1.0,104070,0,0,0,0,0
3,104070,3,4817.832,2359.696,3552.995,1.0,104071,0,0,0,0,0
4,104071,3,4821.512,2360.744,3554.895,1.0,104072,0,0,0,0,0


In [71]:
#Write the converted value back to a eswc file to feed into v3d resample_swc

#In this text file, the x y z represents the coordinates in um

#write the first 3 lines exactly like the loaded file
tfile = open(outdir+'/converted.eswc', 'a')
tfile.write(anno_data[0])
tfile.write(anno_data[1])
tfile.write(anno_data[2])
tfile.write(annotation_df.to_string(header=False, index=False))
tfile.close()

## note this file generates more spaces as separators but thankfully resample_Swc plugin didn't care


## Now, take the resampled eswc file and downsample into 25um

Need to remember the resample stepsize (=1 um in the usual case)


In [85]:
resample_file=fdialog.askopenfile(initialdir='Z:\\', title='Select the eswc file containing the annotations').name 

resampled_xy = simpledialog.askfloat("Input", "What is the x and y resolution in um?",
                               minvalue=0.0, maxvalue=100)
resampled_z = simpledialog.askfloat("Input", "What is the z resolution in um?",
                               minvalue=0.0, maxvalue=100)
goal_xyz = simpledialog.askfloat("Input", "What do you want to downsample the resolution to '(in um)' ?",
                               minvalue=10, maxvalue=100)

outdir = fdialog.askdirectory(title='Please select the output directory')

In [86]:
ratioxy=goal_xyz/resampled_xy
ratioz= goal_xyz/resampled_z
# calculating the downsampling factor
# based on 0.8 x 0.8 x 5um imaging resolution (x y and z)
# downsampling to match the 25um voxel allen reference template

message = (f"Resampled annotation step size is {resampled_xy, resampled_xy, resampled_z} um in x y z. "
f"downsampling to {goal_xyz} um. "
f"dowmsample ratio is xy = {ratioxy} and z = {ratioz}.")

message

'Resampled annotation step size is (1.0, 1.0, 1.0) um in x y z. downsampling to 25.0 um. dowmsample ratio is xy = 25.0 and z = 25.0.'

In [113]:
resampled_anno=open(resample_file,'r')
resampled_anno_data=resampled_anno.readlines()
# heading is stored in anno_data[2], 1st line basically useless

headings=resampled_anno_data[2].rstrip('\n').replace(' ', '').split(',')
resampled_annotations=[lines.rstrip(' 0\n').split(' ') for lines in resampled_anno_data[3:]]
#slight modification on replacing and stripping due to the format of the resampled swc

resampled_annotation_df=pd.DataFrame(resampled_annotations, columns=headings)

resampled_annotation_df.head()

Unnamed: 0,#id,type,x,y,z,r,pid
0,1,3,4811.63,2360.22,3550.49,1,2
1,2,3,4812.59,2360.24,3550.77,1,3
2,3,3,4813.55,2360.27,3551.06,1,121198
3,4,3,4814.32,2360.29,3551.28,1,5
4,5,3,4815.19,2360.06,3551.73,1,6


In [114]:
ds_x= pd.to_numeric(resampled_annotation_df['x'])
ds_x=ds_x/ratioxy
ds_xround=ds_x.astype(int).astype(str)
ds_y= pd.to_numeric(resampled_annotation_df['y'])
ds_y=ds_y/ratioxy
ds_yround=ds_y.astype(int).astype(str)

ds_z= pd.to_numeric(resampled_annotation_df['z'])
ds_z=ds_z/ratioz
ds_zround=ds_z.astype(int).astype(str)

# rounds by astype int, then conver to string

In [115]:
ds_coordinates= pd.DataFrame(columns=['x','y','z'])
ds_coordinates['x']=ds_xround
ds_coordinates['y']=ds_yround
ds_coordinates['z']=ds_zround
# put in to panda dataframe just in case for future use

In [116]:
q = [' '.join(x) for x in zip(ds_xround,ds_yround,ds_zround)]
# create a list where row1= x1 y1 z1 row2= x2 y2 z2 and etc 
# for writing to the text file

In [117]:
num_row=len(annotation_df.index)

out_name= outdir[3:]+ f'_{goal_xyz}voxel_trace_1umStepsize.txt'
out_name

'AL066_25.0voxel_trace_1umStepsize.txt'

In [118]:
f=open(outdir+'/'+out_name,'w+')
f.write('point'+'\n')
f.write(str(num_row)+'\n')

for lines in q:
    f.write(lines+'\n')

f.close()

#works, yay

In [150]:
#attempt to find endings for each branch
# the #n of the ending should not be a parent for something else

#first put these 2 columns in to list
list_n= resampled_annotation_df['#id'].tolist()
list_parent= resampled_annotation_df['pid'].tolist()

##DONOT RUN, some how this takes VERY long. Consider finding the endings using the non-resampled file where we have less points
ending_list=[counter for counter,idx in enumerate(list_n) if idx not in list_parent]


KeyboardInterrupt: 

In [148]:
endings_df=pd.DataFrame(resampled_annotation_df.iloc[ending_list])
endings_df.head()

Unnamed: 0,#n,type,x,y,z,radius,parent,seg_id,level,mode,timestamp,TFresindex
0,104067,3,4811.632,2360.216,3550.49,1.0,104068,0,0,0,0,0
55,104123,3,4953.056,2357.856,3579.955,1.0,104124,0,0,0,0,0
57,104079,3,4954.408,2358.768,3580.555,1.0,104080,0,0,0,0,0
74,104125,3,4999.888,2360.488,3590.92,1.0,104126,0,0,0,0,0
89,104159,3,5095.344,2369.136,3610.45,1.0,104160,0,0,0,0,0


In [120]:
f=open(outdir+'/endings.txt','w+')

for idx in ending_list:
    loc=q[idx]
    f.write(loc+'\n')
f.close()
    

In [43]:
annotation_df=pd.DataFrame(annotations, columns=headings)
annotation_df.columns

AttributeError: 'str' object has no attribute 'type'