In [1]:
"""
An example for histological images color normalization based on the adaptive color deconvolution as described in the paper:
https://github.com/Zhengyushan/adaptive_color_deconvolution

Yushan Zheng, Zhiguo Jiang, Haopeng Zhang, Fengying Xie, Jun Shi, and Chenghai Xue.
Adaptive Color Deconvolution for Histological WSI Normalization.
Computer Methods and Programs in Biomedicine, v170 (2019) pp.107-120.

"""
import os
import cv2
import numpy as np
from glob import glob
from stain_normalizer import StainNormalizer
#from stain_normalizer import StainNormalizer
import imageio.v3 as iio
import matplotlib.pyplot as plt
import torch
import torchmetrics
from torchmetrics.functional import peak_signal_noise_ratio as psnr
from torchmetrics.functional import structural_similarity_index_measure as ssim
from torchmetrics.functional import multiscale_structural_similarity_index_measure as msssim
from glob import glob
import pickle
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go


In [2]:
def compute_metrics(img_path, img_names):
    psnr_list = []
    ssim_list = []
    msssim_list = []
    for img_name in img_names:
        orig_img = os.path.join(img_path,img_name+"_origin.jpg")
        norm_img = os.path.join(img_path,img_name+"_norm.jpg")
        orig_img_load = iio.imread(orig_img)
        orig = torch.torch.from_numpy(orig_img_load.reshape((3, 1024, 1024))).float()
        norm_img_load = iio.imread(norm_img)
        norm = torch.torch.from_numpy(norm_img_load.reshape((3, 1024, 1024))).float()
        psnr_val = psnr(norm, orig)
        ssim_val = ssim(norm[None, :], orig[None, :])
        msssim_val = msssim(norm[None, :], orig[None, :])
        psnr_list.append(psnr_val.cpu().numpy())
        ssim_list.append(ssim_val.cpu().numpy())
        msssim_list.append(msssim_val.cpu().numpy())
    print("PSNR - ","mean: ", np.mean(psnr_list), "max: ", np.max(psnr_list), "min: ", np.min(psnr_list), "std: ", np.std(psnr_list))
    print("SSIM - ","mean: ", np.mean(ssim_list), "max: ", np.max(ssim_list), "min: ", np.min(ssim_list), "std: ", np.std(ssim_list))
    print("MSSSIM - ","mean: ", np.mean(msssim_list), "max: ", np.max(msssim_list), "min: ", np.min(msssim_list), "std: ", np.std(msssim_list))
    return np.mean(psnr_list), np.mean(ssim_list), np.mean(msssim_list), np.std(psnr_list),  np.std(ssim_list), np.std(msssim_list)

def plot_rgb_hist(img_path):
    plant_seedling = iio.imread(img_path)

    # display the image
    fig, ax = plt.subplots()
    ax.imshow(plant_seedling)

    # tuple to select colors of each channel line
    colors = ("red", "green", "blue")

    # create the histogram plot, with three lines, one for
    # each color
    fig, ax = plt.subplots()
    ax.set_xlim([0, 256])
    for channel_id, color in enumerate(colors):
        histogram, bin_edges = np.histogram(
            plant_seedling[:, :, channel_id], bins=256, range=(0, 256)
        )
        ax.plot(bin_edges[0:-1], histogram, color=color)

    ax.set_title("Color Histogram")
    ax.set_xlabel("Color value")
    ax.set_ylabel("Pixel count")

In [4]:
source_image_dir = "/gladstone/finkbeiner/steve/work/data/npsad_data/monika/LBD/WSI_Normalization_techniques/StainNet/data"
result_dir = '/gladstone/finkbeiner/steve/work/data/npsad_data/monika/LBD/WGM_Normalization/results_compute_metrics'
slide_list =  ["13_131_CG_aSyn_x200.svs","14_148_CG_aSyn_x200.svs","PD034_Syn1_CG.svs","PD113_Syn1_CG.svs"]
temp_dir = "/gladstone/finkbeiner/steve/work/data/npsad_data/monika/LBD/WSI_Normalization_techniques/StainNet/data/"
psnr_mean_list =[]
ssim_mean_list = []
msssim_mean_list = []
psnr_std_list = []
ssim_std_list = [] 
msssim_std_list = []
region = "grey"
image_count=[]

In [10]:
#img_list =["11_063_CG_aSyn_x200.svs","12_007_CG_aSyn_x200.svs","PD001_Syn1_CG.svs","PD067_Syn1_CG.svs"]
img_list1 =["11_063_CG_aSyn_x200.svs"]
img_list2 =["11_063_CG_aSyn_x200.svs","PD001_Syn1_CG.svs"]
img_list3 =["11_063_CG_aSyn_x200.svs","12_007_CG_aSyn_x200.svs","PD001_Syn1_CG.svs","PD067_Syn1_CG.svs"]
img_list4 =["11_063_CG_aSyn_x200.svs","12_007_CG_aSyn_x200.svs","PD001_Syn1_CG.svs","PD067_Syn1_CG.svs","14_036_CG_aSyn_x200.svs","PD041_Syn1_CG.svs"]

img_list5 =["11_063_CG_aSyn_x200.svs","12_007_CG_aSyn_x200.svs","PD001_Syn1_CG.svs","PD067_Syn1_CG.svs","14_036_CG_aSyn_x200.svs","PD041_Syn1_CG.svs"
            ,"PD130_Syn1_CG.svs","12_060_CG_aSyn_x200.svs"]



#for img_list in [img_list1,img_list2,img_list3,img_list4]:
for img_list in [img_list4]:
    template_list = []
    for img in img_list:
        template_list.extend(glob(os.path.join(temp_dir,img,"grey","*.png")))
        template_list.extend(glob(os.path.join(temp_dir,img,"White","*.png")))
        template_list.extend(glob(os.path.join(temp_dir,img,"bg","*.png")))

    if ".DS_Store" in template_list:
        template_list.remove(".DS_Store")
    #template_list.remove(".DS_Store")
    #print(len(template_list))
    #temp_images = np.asarray([cv2.imread(os.path.join(template_dir, name)) for name in template_list])
    temp_images = np.asarray([cv2.imread(name) for name in template_list])

    # extract the stain parameters of the template slide
    normalizer = StainNormalizer()
    normalizer.fit(temp_images[:,:,:,[2,1,0]]) #BGR2RGB

    with open('/gladstone/finkbeiner/steve/work/data/npsad_data/monika/LBD/WGM_Normalization/wgm_normalizer.pkl', 'wb') as f:
        pickle.dump(normalizer, f, pickle.HIGHEST_PROTOCOL)
    #print(normalizer)

    if not os.path.exists(result_dir):
        os.makedirs(result_dir)




    """
    slide_list =  ["13_131_CG_aSyn_x200.svs","14_148_CG_aSyn_x200.svs","14_036_CG_aSyn_x200.svs", "14_073_CG_aSyn_x200.svs",
                "PD034_Syn1_CG.svs","PD113_Syn1_CG.svs", "15_005_CG_aSyn_x200.svs", "PD088_Syn1_CG.svs" ,"14_075_CG_aSyn_x200.svs",
                "14_087_CG_aSyn_x200.svs","14_133_CG_aSyn_x200.svs","14_153_CG_aSyn_x200.svs", "PD041_Syn1_CG.svs","PD079_Syn1_CG.svs",
                "PD110_Syn1_CG.svs","PD130_Syn1_CG.svs", "PD133_Syn1_CG.svs"]
                

    val_slide_list = ["12_060_CG_aSyn_x200.svs", "13_177_CG_aSyn_x200.svs","14_053_CG_aSyn_x200.svs","15_007_CG_aSyn_x200.svs",
                    "PD002_Syn1_CG.svs","PD090_Syn1_CG.svs","PD131_Syn1_CG.svs"]
    """
    if ".DS_Store" in slide_list:
        slide_list.remove(".DS_Store")
    #slide_list.remove("screenshots")
    #slide_list = ["screenshots"]
    for s in slide_list:
        print('normalize slide', s)
        slide_dir = os.path.join(source_image_dir, s, region)
        image_list = os.listdir(slide_dir)
        if ".DS_Store" in image_list:
            image_list.remove(".DS_Store")
        images = np.asarray([cv2.imread(os.path.join(slide_dir, name)) for name in image_list])
        print(len(images))
        print(images[0].shape)
        ## color transform
        results = normalizer.transform(images[:,:,:,[2,1,0]]) #BGR2RGB
        
        # display
        for i, result in enumerate(results):
            cv2.imwrite(os.path.join(result_dir, region, s + '_{}_origin.jpg'.format(i)), images[i])
            cv2.imwrite(os.path.join(result_dir, region, s + '_{}_norm.jpg'.format(i)) , result[:,:,[2,1,0]]) #RGB2BGR

    img_path = os.path.join(result_dir,region)
    l = glob(os.path.join(img_path,"*.jpg"))
    img_names = ["_".join(x.split("/")[-1].split("_")[:-1]) for x in l]


    psnr_mean, ssim_mean, msssim_mean,psnr_std, ssim_std, msssim_std = compute_metrics(img_path, img_names)
    psnr_mean_list.append(psnr_mean)
    ssim_mean_list.append(ssim_mean)
    msssim_mean_list.append(msssim_mean)
    psnr_std_list.append(psnr_std)
    ssim_std_list.append(ssim_std)
    msssim_std_list.append(msssim_std)
    image_count.append(len(img_list))

normalize slide 13_131_CG_aSyn_x200.svs
341
(1024, 1024, 3)
[[ 1.712277   -0.63181734 -0.86679864]
 [-0.23543927  1.1600862  -0.10434286]
 [-0.1355897  -0.1378358   1.1282115 ]] [array(1.3236874, dtype=float32), array(1.7769387, dtype=float32), array(1)]
normalize slide 14_148_CG_aSyn_x200.svs
238
(1024, 1024, 3)
[[ 1.2199855  -0.81447405  0.04131999]
 [ 0.13947643  1.2510929  -0.8761335 ]
 [-0.00951277 -0.00832737  1.0093775 ]] [array(1.4354951, dtype=float32), array(1.807771, dtype=float32), array(1)]
normalize slide PD034_Syn1_CG.svs
144
(1024, 1024, 3)
[[ 1.0908738  -0.77517515  0.08940606]
 [ 0.4039323   1.2301264  -0.9530506 ]
 [-0.12820004 -0.09829146  1.1228772 ]] [array(1.6931378, dtype=float32), array(1.9737273, dtype=float32), array(1)]
normalize slide PD113_Syn1_CG.svs
80
(1024, 1024, 3)
[[ 1.3524972  -1.9095556   1.4492662 ]
 [-0.01551099  2.2152572  -2.0007908 ]
 [ 0.02565747  0.03311281  0.96545476]] [array(1.2550589, dtype=float32), array(1.476709, dtype=float32), array

In [14]:
image_count

[1, 2, 4, 6, 8]

In [11]:
psnr_mean_list

[28.705872, 29.432236, 32.47057, 34.34089, 34.345016]

In [12]:
ssim_mean_list

[0.938722, 0.9398712, 0.95419604, 0.96540844, 0.96957916]

In [56]:
msssim_mean_list

[0.98319954, 0.9841844, 0.9880916, 0.9907829, 0.9917404]

In [57]:
psnr_std_list, ssim_std_list, msssim_std_list

([5.039859, 5.2972035, 6.1276836, 5.8041406, 4.384928],
 [0.04425198, 0.045174543, 0.033766285, 0.022369577, 0.016264055],
 [0.010035755, 0.009936317, 0.0075638136, 0.005140843, 0.0037173126])

In [62]:
from plotly.subplots import make_subplots


fig = make_subplots(rows=1, cols=1)

fig.add_trace(go.Scatter(y=psnr_mean_list, x=image_count, mode="lines+markers+text",
            marker=dict(
                size=9,
            ),line=dict(
            width=5),text=psnr_mean_list,
    textposition="bottom center",error_y=dict(type='data', array=psnr_std_list)
    
            ), row=1, col=1)

fig.update_layout(height=700, width=1000)
fig.update_layout(
        plot_bgcolor='white'
    )
fig.update_layout(
    autosize=False,
    xaxis = dict(
        mirror=True,
        ticks='outside',
        #showline=True,
        linecolor='black',
        gridcolor='lightgrey',
        tickmode = 'array',
        #tickvals=[  "Baseline",   "5",  "10",  "15",  "20",  "25",  "30",  "40",  "50",  "60",  "75",
        #"90", "105", "120", "135"],
        #range=[0, 140],
        zeroline=True, zerolinewidth=2,
        linewidth = 4
        #, ticklen=5,
        #constrain="domain",
        #gridwidth=10,
    #)
    ),
    #fig.update_yaxes(
      yaxis = dict(  mirror=True,
        ticks='outside',
        showline=True,
        linecolor='black',
        gridcolor='lightgrey',
        range=[0, 50],
        tickmode = 'array',
        zeroline=True, zerolinewidth=2,
       linewidth=4
        #gridwidth=10,
        
    ),
      xaxis_title =dict(text="<b> Number of images</b> ", font=dict(size=25))  ,
      yaxis_title=dict(text="<b> PSNR value </b> ", font=dict(size=25)) 
    )
#fig.update_layout(
#    title=dict(text="<b> Mean Pulse Rate </b>", font=dict(size=35))
#)
fig.update_layout(
yaxis = dict(
tickfont = dict(size=20)))
fig.update_layout(
xaxis = dict(
tickfont = dict(size=20)))
fig.update_yaxes(tickfont_family="Arial Black")
fig.update_xaxes(tickfont_family="Arial Black")
fig.update_layout(legend = dict(font = dict(family = "Arial Black", size = 30, color = "black")),
                  legend_title = dict(text="Group", font = dict(family = "Arial Black", size = 30, color = "green")))

#fig.update_layout(legend_title_text='Group',  dict(font = dict(family = "Arial Black", size = 20, color = "black"))

fig.show()


In [67]:
from plotly.subplots import make_subplots


fig = make_subplots(rows=1, cols=1)

fig.add_trace(go.Scatter(y=msssim_mean_list, x=image_count, mode="lines+markers+text",
            marker=dict(
                size=9,
            ),line=dict(
            width=5),text=msssim_mean_list,
    textposition="bottom center",error_y=dict(type='data', array=msssim_std_list)
    
            ), row=1, col=1)

fig.update_layout(height=700, width=1000)
fig.update_layout(
        plot_bgcolor='white'
    )
fig.update_layout(
    autosize=False,
    xaxis = dict(
        mirror=True,
        ticks='outside',
        #showline=True,
        linecolor='black',
        gridcolor='lightgrey',
        tickmode = 'array',
        #tickvals=[  "Baseline",   "5",  "10",  "15",  "20",  "25",  "30",  "40",  "50",  "60",  "75",
        #"90", "105", "120", "135"],
        #range=[0, 140],
        zeroline=True, zerolinewidth=2,
        linewidth = 4
        #, ticklen=5,
        #constrain="domain",
        #gridwidth=10,
    #)
    ),
    #fig.update_yaxes(
      yaxis = dict(  mirror=True,
        ticks='outside',
        showline=True,
        linecolor='black',
        gridcolor='lightgrey',
        range=[0, 1],
        tickmode = 'array',
        zeroline=True, zerolinewidth=2,
       linewidth=4
        #gridwidth=10,
        
    ),
      xaxis_title =dict(text="<b> Number of images</b> ", font=dict(size=25))  ,
      yaxis_title=dict(text="<b> MSSSIM value </b> ", font=dict(size=25)) 
    )
#fig.update_layout(
#    title=dict(text="<b> Mean Pulse Rate </b>", font=dict(size=35))
#)
fig.update_layout(
yaxis = dict(
tickfont = dict(size=20)))
fig.update_layout(
xaxis = dict(
tickfont = dict(size=20)))
fig.update_yaxes(tickfont_family="Arial Black")
fig.update_xaxes(tickfont_family="Arial Black")
fig.update_layout(legend = dict(font = dict(family = "Arial Black", size = 30, color = "black")),
                  legend_title = dict(text="Group", font = dict(family = "Arial Black", size = 30, color = "green")))

#fig.update_layout(legend_title_text='Group',  dict(font = dict(family = "Arial Black", size = 20, color = "black"))

fig.show()


In [60]:
from plotly.subplots import make_subplots


fig = make_subplots(rows=1, cols=1)

fig.add_trace(go.Scatter(y=ssim_mean_list, x=image_count, mode="lines+markers+text",
            marker=dict(
                size=9,
            ),line=dict(
            width=5),text=ssim_mean_list,
    textposition="bottom center"
    , name="Mean SSIM", error_y=dict(type='data', array=ssim_std_list)
            ), row=1, col=1)

fig.add_trace(go.Scatter(y=msssim_mean_list, x=image_count, mode="lines+markers+text",
            marker=dict(
                size=9,
            ),line=dict(
            width=5),text=msssim_mean_list, error_y=dict(type='data', array=msssim_std_list),
    textposition="bottom center"
    ,name="Mean MSSSIM"
            ), row=1, col=1)

fig.update_layout(height=700, width=1000)
fig.update_layout(
        plot_bgcolor='white'
    )
fig.update_layout(
    autosize=False,
    xaxis = dict(
        mirror=True,
        ticks='outside',
        #showline=True,
        linecolor='black',
        gridcolor='lightgrey',
        tickmode = 'array',
        #tickvals=[  "Baseline",   "5",  "10",  "15",  "20",  "25",  "30",  "40",  "50",  "60",  "75",
        #"90", "105", "120", "135"],
        #range=[0, 140],
        zeroline=True, zerolinewidth=2,
        linewidth = 4
        #, ticklen=5,
        #constrain="domain",
        #gridwidth=10,
    #)
    ),
    #fig.update_yaxes(
      yaxis = dict(  mirror=True,
        ticks='outside',
        showline=True,
        linecolor='black',
        gridcolor='lightgrey',
        range=[0.5, 1],
        tickmode = 'array',
        zeroline=True, zerolinewidth=2,
       linewidth=4
        #gridwidth=10,
        
    ),
      xaxis_title =dict(text="<b> Number of images</b> ", font=dict(size=25))  ,
      yaxis_title=dict(text="<b> Metric value </b> ", font=dict(size=25)) 
    )
#fig.update_layout(
#    title=dict(text="<b> Mean Pulse Rate </b>", font=dict(size=35))
#)
fig.update_layout(
yaxis = dict(
tickfont = dict(size=20)))
fig.update_layout(
xaxis = dict(
tickfont = dict(size=20)))
fig.update_yaxes(tickfont_family="Arial Black")
fig.update_xaxes(tickfont_family="Arial Black")
fig.update_layout(legend = dict(font = dict(family = "Arial Black", size = 30, color = "black")),
                  legend_title = dict(text="Metric", font = dict(family = "Arial Black", size = 30, color = "green")))

#fig.update_layout(legend_title_text='Group',  dict(font = dict(family = "Arial Black", size = 20, color = "black"))

fig.show()


## Final Training Image normalization

In [48]:
## Fit a Normalizer
img_list4 =["11_063_CG_aSyn_x200.svs","12_007_CG_aSyn_x200.svs","PD001_Syn1_CG.svs","PD067_Syn1_CG.svs","14_036_CG_aSyn_x200.svs","PD041_Syn1_CG.svs"]
template_list = []
for img in img_list4:
    template_list.extend(glob(os.path.join(temp_dir,img,"grey","*.png")))
    template_list.extend(glob(os.path.join(temp_dir,img,"White","*.png")))
    template_list.extend(glob(os.path.join(temp_dir,img,"bg","*.png")))

if ".DS_Store" in template_list:
    template_list.remove(".DS_Store")

temp_images = np.asarray([cv2.imread(name) for name in template_list])

# extract the stain parameters of the template slide
normalizer = StainNormalizer()
normalizer.fit(temp_images[:,:,:,[2,1,0]]) #BGR2RGB

with open('/gladstone/finkbeiner/steve/work/data/npsad_data/monika/LBD/WGM_Normalization/wgm_normalizer.pkl', 'wb') as f:
    pickle.dump(normalizer, f, pickle.HIGHEST_PROTOCOL)


In [50]:
with open('/gladstone/finkbeiner/steve/work/data/npsad_data/monika/LBD/WGM_Normalization/wgm_normalizer.pkl', 'rb') as f:
    normalizer = pickle.load(f)

In [53]:
result_dir = '/gladstone/finkbeiner/steve/work/data/npsad_data/monika/LBD/WGM_Normalization/normalized_data'
data_types=["train","val"]
regions = ["grey","White","bg"]

slide_list = {"train": ["13_131_CG_aSyn_x200.svs","14_148_CG_aSyn_x200.svs","14_073_CG_aSyn_x200.svs",
                    "PD034_Syn1_CG.svs","PD113_Syn1_CG.svs", "15_005_CG_aSyn_x200.svs", "PD088_Syn1_CG.svs" ,"14_075_CG_aSyn_x200.svs",
                    "14_087_CG_aSyn_x200.svs","14_133_CG_aSyn_x200.svs","14_153_CG_aSyn_x200.svs","PD079_Syn1_CG.svs",
                    "PD110_Syn1_CG.svs","PD130_Syn1_CG.svs", "PD133_Syn1_CG.svs"],
              
              "val":["12_060_CG_aSyn_x200.svs", "13_177_CG_aSyn_x200.svs","14_053_CG_aSyn_x200.svs","15_007_CG_aSyn_x200.svs",
                       "PD002_Syn1_CG.svs","PD090_Syn1_CG.svs","PD131_Syn1_CG.svs"]}


for data_type in data_types:
    for region in regions:
        if not os.path.exists(result_dir):
            os.makedirs(result_dir)
        for s in slide_list[data_type]:
            print('normalize slide', s)
            slide_dir = os.path.join(source_image_dir, s, region)
            image_list = os.listdir(slide_dir)
            if ".DS_Store" in image_list:
                image_list.remove(".DS_Store")
                
            images = np.asarray([cv2.imread(os.path.join(slide_dir, name)) for name in image_list])
            
            ## color transform
            results = normalizer.transform(images[:,:,:,[2,1,0]]) #BGR2RGB
            
            # display
            for result, image_name  in zip(results,image_list):
                #cv2.imwrite(os.path.join(result_dir, region, s + '_{}_origin.jpg'.format(i)), images[i])
                image_name = image_name.replace("png","jpg")
                cv2.imwrite(os.path.join(result_dir,data_type, region, image_name) , result[:,:,[2,1,0]]) #RGB2BGR

normalize slide 13_131_CG_aSyn_x200.svs
[[ 1.7113962  -0.6389496  -0.8537888 ]
 [-0.23379594  1.1648612  -0.11749933]
 [-0.13729598 -0.13935758  1.1307889 ]] [array(1.3236632, dtype=float32), array(1.7775086, dtype=float32), array(1)]
normalize slide 14_148_CG_aSyn_x200.svs
[[ 1.2139407  -0.7879919   0.01416416]
 [ 0.14811005  1.2391962  -0.8573132 ]
 [-0.01657958 -0.01463087  1.0163872 ]] [array(1.4401124, dtype=float32), array(1.8079122, dtype=float32), array(1)]
normalize slide 14_073_CG_aSyn_x200.svs
[[ 1.0411729  -0.4548973  -0.31657672]
 [ 0.5767238   1.1185056  -0.9473286 ]
 [ 0.02614867  0.01984254  0.97477496]] [array(1.5670915, dtype=float32), array(1.7372993, dtype=float32), array(1)]
normalize slide PD034_Syn1_CG.svs
[[ 1.0749581  -0.77047724  0.10332641]
 [ 0.41879526  1.2354438  -0.96704113]
 [-0.12808886 -0.09822807  1.1224657 ]] [array(1.6947242, dtype=float32), array(1.9712646, dtype=float32), array(1)]
normalize slide PD113_Syn1_CG.svs
[[ 1.3710295  -2.0636377   1.610

In [29]:
result_dir = '/gladstone/finkbeiner/steve/work/data/npsad_data/monika/LBD/WGM_Normalization/normalized_data'
data_types=["train","val"]
regions = ["grey","White","bg"]

In [30]:
def map_label(class_name):
    if class_name=="White":
        return 0
    if class_name=="grey":
        return 1
    if class_name=="bg":
        return 2
    return -1

In [31]:
df = pd.DataFrame(columns=["WSI_filename","crop_filepath","class"])
for label_name in regions:
    files = glob(os.path.join(result_dir, data_types[0], label_name, '*.jpg'))
    #print(len(files))glob
    temp_df =  pd.DataFrame({"crop_filepath":files})
    #temp_df["WSI_filename"] = folder_name
    temp_df["class"] = label_name
    df = pd.concat([df, temp_df], ignore_index=True)
df["WSI_filename"]=df["crop_filepath"].apply(lambda l:l.split("/")[-1].split(".")[0])
df["LBD_tag"] = df["WSI_filename"].apply(lambda l: "PDD" if l.startswith("PD") else "DLB")
df["label"]=df["class"].apply(lambda l:map_label(l))
df.to_csv("/home/mahirwar/Desktop/Monika/npsad_data/monika/LBD/Intermediate_data/train_acd_norm_full.csv")
df[["WSI_filename","crop_filepath","label"]].to_csv("/home/mahirwar/Desktop/Monika/npsad_data/monika/LBD/Intermediate_data/train_acd_norm.csv")


In [22]:
df.groupby(["LBD_tag","class"])["crop_filepath"].count()


LBD_tag  class
DLB      White    1472
         bg       1387
         grey     1892
PDD      White     557
         bg        642
         grey      760
Name: crop_filepath, dtype: int64

In [33]:
df = pd.DataFrame(columns=["WSI_filename","crop_filepath","class"])
for label_name in regions:
    files = glob(os.path.join(result_dir, data_types[1], label_name, '*.jpg'))
    #print(len(files))glob
    temp_df =  pd.DataFrame({"crop_filepath":files})
    #temp_df["WSI_filename"] = folder_name
    temp_df["class"] = label_name
    df = pd.concat([df, temp_df], ignore_index=True)

In [34]:
df["WSI_filename"]=df["crop_filepath"].apply(lambda l:l.split("/")[-1].split(".")[0])
df["LBD_tag"] = df["WSI_filename"].apply(lambda l: "PDD" if l.startswith("PD") else "DLB")
df["label"]=df["class"].apply(lambda l:map_label(l))
df.to_csv("/home/mahirwar/Desktop/Monika/npsad_data/monika/LBD/Intermediate_data/val_acd_norm_full.csv")
df[["WSI_filename","crop_filepath","label"]].to_csv("/home/mahirwar/Desktop/Monika/npsad_data/monika/LBD/Intermediate_data/val_acd_norm.csv")

In [35]:
df.groupby(["LBD_tag","class"])["crop_filepath"].count()


LBD_tag  class
DLB      White    820
         bg       605
         grey     381
PDD      White    247
         bg       517
         grey     307
Name: crop_filepath, dtype: int64