0. Scan glass slides at 20x
1. Create Excel sheet with patient information
2. Screen H&E images to select potentially healthy sections
3. Annotate the Region of Interest (ROI) (Optional)
4. Convert ROI XML to Image (Code: roixml2png.py)
5. Convert H&E images to 1um resolution tif to apply DeepLab (Code: rescale_wsi.py) (Matlab Code: infer_12class)
6. Annotate 12 tissue compartments, apply DeepLab, and evaluate performance metrics
7. Apply HoVerNet
8. Rotate and crop tissue pieces, DLmask, HoVerNet segmented nuclei mask (Code: align_he_dl_cnt.py)
9. Calculate tissue composition (Code: DLcomposition.py)
10. Calculate epidermis thickness, waviness, roughness (Matlab Code)
12. Calculate dermis thickness, waviness (Matlab Code)
13. Calculate ECM anisotropy,Interstitial orientation, Thick gap ratio, Thick gap average (Matlab Code)
14. Calculate tissue morphology (Matlab Code)
15. ECM cell count and morphology + std using Manual Threshold method
16. Distance from ECM cell to tissue
17. All celltype morphology, orientation, and distance to tissue using HoVerNet (Code: hovernet_json2df + summarize_nuclei_shape_df ; these codes are unrotated; need update if we want to add orientation  or postprocess DLmask)
18. Further add features such as more detailed cell subtyping (TBD)

In [None]:
import os
import pandas as pd
from natsort import natsorted
from PIL import Image
from matplotlib import pyplot as plt

#utilities
from roixml2png import roixml2png
from rescale_wsi import rescale_wsi
from align_he_dl_cnt import align_he_dl_cnt
from DLcomposition import DLcomposition

In [None]:
wsisrc = r'\\fatherserverdw\kyuex\clue images'
oneum = r'\\fatherserverdw\kyuex\clue images\1um'
aligned_images_dst = r'\\fatherserverdw\kyuex\datadst\20220929'
imcropsrc = os.path.join(aligned_images_dst,'imcrop')
imcroproi = os.path.join(aligned_images_dst,'imcrop_roi')
dlsrc = r'\\fatherserverdw\kyuex\clue images\1um\classification_v9_combined'
dlcropsrc = os.path.join(aligned_images_dst,'dlcrop')
dlcroproi = r'\\fatherserverdw\kyuex\clue images\1um\classification_v9_combined\dlcrop_roi'
roixml = r'\\fatherserverdw\kyuex\clue images\annotations\roi'
roisrc = r'\\fatherserverdw\kyuex\clue images\annotations\roi\labeledmask_20rsf'
tissue_area_small = r'\\fatherserverdw\kyuex\clue images\annotations\roi\TA_20rsf'
twelve_annot = r'\\fatherserverdw\kyuex\clue images\annotations\12class'
LUT = r"\\fatherserverdw\kyuex\imlist_all.xlsx"
cntsrc = r'\\fatherserverdw\kyuex\clue images\hovernet_out\json'
analysisdst= r'\\fatherserverdw\kyuex\clue images\analysis output'

In [None]:
dlcropsrc

In [None]:
run_roixml2png = False
run_rescale_wsi = False
run_align_he_dl_cnt = False
run_DLcomposition = True

In [None]:
#Load healthy tissue look up table (LUT)
LUT = pd.read_excel(LUT)
# xmlist = LUT['filename'][(LUT['student score']>1) & (LUT['Block or Slide?']=="Both")]
LUThealthy = LUT[(LUT['student score']>1)]
LUThealthyback = LUThealthy[LUThealthy['body part 1'].str.lower()=="back"]

In [None]:
#Step 4: ROI XML 2 Image
#Saves Image to where XML is in subfolders
if run_roixml2png:
    xmlist = [os.path.join(roixml,xmlpth.replace('ndpi','xml')) for xmlpth in LUThealthyback['filename']]
    [roixml2png(xmlpth,wsisrc) for xmlpth in xmlist]
    # pd.DataFrame(np.array(roiinfo),columns=['fn','ROIA','TA','ratio']).to_csv(r"\\fatherserverdw\kyuex\ROITA_ratio.csv")

In [None]:
#Step 5: rescale wsi to 1um for applying deeplab
#Saves Image to where wsi is in subfolder
if run_rescale_wsi:
    wsis = [os.path.join(wsisrc,_) for _ in LUThealthy.filename]
    wsis = natsorted(wsis)
    for idx,wsi in enumerate(wsis):
        rescale_wsi(wsi,1)
        print(idx,'/',len(wsis))

In [None]:
# Narrow down LUT to which that has json, dlmask, roi
def hasjson(row):
    fn,ext = os.path.splitext(row)
    return os.path.exists(os.path.join(cntsrc,fn+'.json'))
def hasdl(row):
    fn,ext = os.path.splitext(row)
    return os.path.exists(os.path.join(dlcropsrc,fn+'.tif'))
def hasroi(row):
    fn,ext = os.path.splitext(row)
    return os.path.exists(os.path.join(roisrc,fn+'.png'))
LUThealthyback['hasjson']=LUThealthyback['filename'].apply(lambda row: hasjson(row))
LUThealthyback['hasdl']=LUThealthyback['filename'].apply(lambda row: hasdl(row))
LUThealthyback['hasroi']=LUThealthyback['filename'].apply(lambda row: hasroi(row))
LUThealthyback2 = LUThealthyback[(LUThealthyback['hasjson']==True) & (LUThealthyback['hasdl']==True) & (LUThealthyback['hasroi']==True)]
LUThealthyback2 = LUThealthyback2.reset_index()
len(LUThealthyback2),len(LUThealthyback[(LUThealthyback['hasjson']==True)]),len(LUThealthyback[(LUThealthyback['hasdl']==True)]),len(LUThealthyback)

In [None]:
# Step 8: Rotate and crop tissue pieces, DLmask, HoVerNet segmented nuclei mask (Code: align_he_dl_cnt.py)
if run_align_he_dl_cnt:
    for idx,row in LUThealthyback2.iterrows():
        print(idx,'/',len(LUThealthyback2))
        fn,ext = os.path.splitext(row['filename'])
        try:
            rotationdf = align_he_dl_cnt(aligned_images_dst,fn,wsisrc,dlsrc,cntsrc,roisrc)
        except:
            print(fn)

In [None]:
# Narrow down LUT to which that has rotated image crop and dl crop
def hasimcrop(row):
    fn,ext = os.path.splitext(row)
    fn = fn+'sec1'
    return os.path.exists(os.path.join(imcropsrc,fn+'.png'))
LUThealthyback2['hasimcrop']=LUThealthyback2['filename'].apply(lambda row: hasimcrop(row))
LUThealthyback3=LUThealthyback2[LUThealthyback2['hasimcrop']]
LUThealthyback3 = LUThealthyback3.reset_index(drop=True)

In [None]:
def hasdlcrop(row):
    fn,ext = os.path.splitext(row)
    return os.path.exists(os.path.join(dlcropsrc,fn+'sec1.png'))
LUThealthy['hasdlcrop']=LUThealthy['filename'].apply(lambda row: hasdlcrop(row))
LUThealthy2 = LUThealthy[(LUThealthy['hasdlcrop']==True)].reset_index(drop=True)
if run_DLcomposition:
    fns = []    #filename
    comps = [] #composition data
    secs = []   #sectionID
    for idx,row in LUThealthy2.iterrows():
        print(idx+1,'/',len(LUThealthy2))
        fn,ext = os.path.splitext(row['filename'])
        sections = [_ for _ in os.listdir(dlcropsrc) if fn in _]
        for section in sections:
            comp = DLcomposition(os.path.join(dlcropsrc,section))
            comps.append(comp)
            fn = row['filename']
            fns.append(fn)
            sec = section[-5]
            secs.append(sec)
    pd.concat([pd.DataFrame([fns,secs]).T,pd.DataFrame(comps)],axis=1).to_csv(os.path.join(analysisdst,'composition.csv'))

In [None]:
a=pd.read_csv(r"\\fatherserverdw\kyuex\clue images\analysis output\avgdf.csv")
b=pd.read_csv(r"\\fatherserverdw\kyuex\clue images\analysis output\complete_datasheet\epidermis_profile.csv",header=3)
def isin2(row):
    return b['filename'].str.contains(row+'.ndpi').any()
a['isin']=a['imID'].apply(lambda row:isin2(row))
a.to_csv(r"\\fatherserverdw\kyuex\clue images\analysis output\avgdf.csv")

In [None]:
a=pd.read_csv(r"\\fatherserverdw\kyuex\clue images\analysis output\cvdf.csv")
b=pd.read_csv(r"\\fatherserverdw\kyuex\clue images\analysis output\complete_datasheet\epidermis_profile.csv",header=3)
def isin2(row):
    return b['filename'].str.contains(row+'.ndpi').any()
a['isin']=a['imID'].apply(lambda row:isin2(row))
a.to_csv(r"\\fatherserverdw\kyuex\clue images\analysis output\cvdf.csv")

In [None]:
b=pd.read_csv(r"\\fatherserverdw\kyuex\clue images\analysis output\complete_datasheet\epidermis_profile.csv",header=3)
b