#  Install, Setup drives and imports

In [None]:
import glob
from tqdm.autonotebook import tqdm
from shapely.geometry import shape
import os
import geojson
import pandas as pd


In [2]:
IN_CWR_SERVER = False 
slideset = 'cav2'

# Files


In [3]:
if IN_CWR_SERVER :
  datadir = "//datacd31/"
else : # LOCAL
  datadir = "C:\\research\\cav\\datacd31\\"

# input folders and files
jsondir = datadir + "json\\" + slideset + "\\"
imgdir = datadir + "thumb\\" + slideset + "\\"
img_files= glob.glob(imgdir + slideset + "-DC-*_thumb.png") + \
  glob.glob(imgdir + slideset + "-DY1-*_thumb.png")+ \
  glob.glob(imgdir + slideset + "-USHER-*_thumb.png")+ \
  glob.glob(imgdir + slideset + "-MATCH-*_thumb.png")+ \
  glob.glob(imgdir + slideset + "-HC-*_thumb.png") 
#img_files=glob.glob(imgdir + "/CAV-DC-12_thumb.png") +\
#  glob.glob(imgdir + "/CAV-HC-6_thumb.png") 
#img_files=glob.glob(imgdir + "/CAV2-DY1-15_thumb.png") +\
#  glob.glob(imgdir + "/CAV2-HC-6_thumb.png") 


#output folders and files
outdir = datadir + "output\\" + slideset + "\\"
#json4qp = "json4qp"
DAB_CSV = "21cd31_dab.csv"
HEM_CSV = "21cd31_hem.csv"
TIS_CSV = "21cd31_tis.csv"
NUC_CSV = "21cd31_nuc.csv"

# configs
save_csv = True

print(len(img_files))
#print(img_files)


115


# Functions

In [4]:
#---------------------------------------------------
def readAnnoJsonFile(ann_fname): 
  with open(ann_fname) as a:
      annotationgeojson = geojson.load(a)
  cd31_dab_anno_list = [obj for obj in annotationgeojson if(obj['properties']['classification']['name']== "CD31DAB")]
  cd31_dab_geom_list = [shape(obj["geometry"]) for obj in cd31_dab_anno_list] 

  cd31_hem_anno_list = [obj for obj in annotationgeojson if(obj['properties']['classification']['name']== "CD31HEMA")]
  cd31_hem_geom_list = [shape(obj["geometry"]) for obj in cd31_hem_anno_list] 

  cd31_tis_anno_list = [obj for obj in annotationgeojson if(obj['properties']['classification']['name']== "CD31Tissue")]
  cd31_tis_geom_list = [shape(obj["geometry"]) for obj in cd31_tis_anno_list] 

  return cd31_dab_geom_list, cd31_hem_geom_list, cd31_tis_geom_list

#---------------------------------------------------
def readNucJsonFile(nuc_fname): 
  with open(nuc_fname) as a:
      nucobjects = geojson.load(a)

  nucshapes=[shape(obj["geometry"]) for obj in nucobjects]
  return nucshapes

# Main

In [5]:

dab_df=pd.DataFrame()
hem_df=pd.DataFrame()
tis_df=pd.DataFrame()
nuc_df=pd.DataFrame()
for image_fname in tqdm(img_files):
  ann_fname=image_fname.replace(imgdir, jsondir).replace("_thumb.png","_ann.json")
  nuc_fname=image_fname.replace(imgdir, jsondir).replace("_thumb.png","_nuc.json")
  
  #e.g. = CAV-DC-1, CAV-DY1-1
  fname = os.path.basename(image_fname).replace("_thumb.png","").replace("\\","")
  #e.g. = DC, DY
  #cohort = fname.replace("CAV-", "")[:2]
  cohort = fname.replace(slideset.upper() + "-", "")[:2]
  #print("processing...", fname)

  # read geojson files
  dab_geom_list, hem_geom_list, tis_geom_list = readAnnoJsonFile(ann_fname) 
  nuc_geom_list = readNucJsonFile(nuc_fname) 
  # extract data
  # text = [["Hello", "World!"], ["Lets", "Eat!"]]
  # [word.area for words in text for word in words]
  dab_area_list = [dab.area for dab_list in dab_geom_list for dab in dab_list]
  dab_poly = [dab for dab_list in dab_geom_list for dab in dab_list]

  hem_area_list = [hem.area for hem_list in hem_geom_list for hem in hem_list]
  tis_area_list = [tis.area for tis_list in tis_geom_list for tis in tis_list]
  nuc_area_list = [nuc.area  for nuc in nuc_geom_list]

  # append data to corresponding data frame
  temp_d_df = pd.DataFrame({'cohort': cohort, 'fname':fname, 'dab_area': dab_area_list, 'dab_poly':dab_poly})
  temp_h_df = pd.DataFrame({'cohort': cohort, 'fname':fname, 'hem_area': hem_area_list})
  temp_t_df = pd.DataFrame({'cohort': cohort, 'fname':fname, 'tis_area': tis_area_list})
  temp_n_df = pd.DataFrame({'cohort': cohort, 'fname':fname, 'nuc_area': nuc_area_list})
  
  #print(nuc_objects[0])
  dab_df = dab_df.append(temp_d_df)
  hem_df = hem_df.append(temp_h_df)
  tis_df = tis_df.append(temp_t_df)
  nuc_df = nuc_df.append(temp_n_df)

  #break


100%|██████████| 115/115 [11:23<00:00,  5.94s/it]


In [6]:
if save_csv :
  if IN_CWR_SERVER :
    print("not saving csv")
  else:
    print("save to pandas output")
  #---- save to pandas output
    f_dab_df = pd.DataFrame()
    f_hem_df = pd.DataFrame()
    f_tis_df = pd.DataFrame()
    f_nuc_df = pd.DataFrame()

    f_dab_df = f_dab_df.append(dab_df)
    f_hem_df = f_hem_df.append(hem_df)
    f_tis_df = f_tis_df.append(tis_df)
    f_nuc_df = f_nuc_df.append(nuc_df)

print(f"saving: {outdir}{DAB_CSV}.zip")
f_dab_df.to_csv(f"{outdir}{DAB_CSV}.zip", index=False, compression=dict(method='zip', archive_name=f'{DAB_CSV}'))
print(f"saving: {outdir}{HEM_CSV}.zip")
f_hem_df.to_csv(f"{outdir}{HEM_CSV}.zip", index=False, compression=dict(method='zip', archive_name=f'{HEM_CSV}'))
print(f"saving: {outdir}{TIS_CSV}.zip")
f_tis_df.to_csv(f"{outdir}{TIS_CSV}.zip", index=False, compression=dict(method='zip', archive_name=f'{TIS_CSV}'))
print(f"saving: {outdir}{NUC_CSV}.zip")
f_nuc_df.to_csv(f"{outdir}{NUC_CSV}.zip", index=False, compression=dict(method='zip', archive_name=f'{NUC_CSV}'))
print("All done")

save to pandas output
saving: C:\research\cav\datacd31\output\cav2\21cd31_dab.csv.zip
saving: C:\research\cav\datacd31\output\cav2\21cd31_hem.csv.zip
saving: C:\research\cav\datacd31\output\cav2\21cd31_tis.csv.zip
saving: C:\research\cav\datacd31\output\cav2\21cd31_nuc.csv.zip
All done


# Working area