In [3]:
import histomicstk.features as hf
import numpy as np
import cv2
import pandas as pd
from tqdm import tqdm
from skimage.measure import label
from matplotlib import pyplot as plt
import sys
sys.path.append('/groups/4/gaa50089/acd13264yb/Rettsyndrome/Classification/')
from Scripts.utils import nucleus_intensity_distribution

In [5]:
labels = [
    "Intensity.WholeNucleus",
    "Intensity.part05", 
    "Intensity.part04", 
    "Intensity.part03", 
    "Intensity.part02", 
    "Intensity.part01",
    "Intensity.distribution.part05", 
    "Intensity.distribution.part04", 
    "Intensity.distribution.part03", 
    "Intensity.distribution.part02", 
    "Intensity.distribution.part01",
]

# Extract Image features

In [8]:
stain_type = ["H3K27ac", "CTCF", "Dapi"]

for ctrl_type in ["CTRL", "RETT"]:
    if ctrl_type=="CTRL":
        img_path=f"../Classification/Datasets/{ctrl_type}_All.npy"
    elif ctrl_type=="RETT":
        img_path=f"../Classification/Datasets/{ctrl_type}_HPS9999_All.npy"
    img_all = np.load(img_path, allow_pickle=True)

    mask_all = []

    for img in img_all:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        _, thresh = cv2.threshold(gray, 0, 1, cv2.THRESH_BINARY)
        mask_all.append(thresh)
    mask_all = np.array(mask_all)
    print(f"img all shape: {img_all.shape}")
    print(f"mask all shape: {mask_all.shape}")

    # Compute and Save nuclei features
    for c in range(3):
        # Init DataFrame to save
        features_all = pd.DataFrame()

        for i in range(10):
            im_label = mask_all[i]
            im_nuclei = img_all[i,:,:,c]
            features = hf.compute_nuclei_features(im_label=im_label, im_nuclei=im_nuclei)
            features["Label"] = i
            # Add new feature intensity distribution part 5 ~ 0 to DataFrame 
            intensity_distribution = nucleus_intensity_distribution(im_label, im_nuclei)
            for part, label in enumerate(labels):
                features[label] = intensity_distribution[part]
            # 合并 features 到 features_all
            features_all = pd.concat([features_all, features], ignore_index=True)
#         # Save DataFrame as CSV
#         features_name = f'features_{ctrl_type}_{stain_type[c]}.csv'
#         features_all.to_csv(features_name, index=False)
#         print(f"🔥 Save features_all as {features_name}")

img all shape: (1723, 500, 500, 3)
mask all shape: (1723, 500, 500)
img all shape: (3595, 500, 500, 3)
mask all shape: (3595, 500, 500)


In [11]:
features = features_all.columns.tolist()
for i in range(len(features)):
    print(f"{i:02} {features[i]}")

00 Label
01 Identifier.Xmin
02 Identifier.Ymin
03 Identifier.Xmax
04 Identifier.Ymax
05 Identifier.CentroidX
06 Identifier.CentroidY
07 Identifier.WeightedCentroidX
08 Identifier.WeightedCentroidY
09 Orientation.Orientation
10 Size.Area
11 Size.ConvexHullArea
12 Size.MajorAxisLength
13 Size.MinorAxisLength
14 Size.Perimeter
15 Shape.Circularity
16 Shape.Eccentricity
17 Shape.EquivalentDiameter
18 Shape.Extent
19 Shape.FractalDimension
20 Shape.MinorMajorAxisRatio
21 Shape.Solidity
22 Shape.HuMoments1
23 Shape.HuMoments2
24 Shape.HuMoments3
25 Shape.HuMoments4
26 Shape.HuMoments5
27 Shape.HuMoments6
28 Shape.HuMoments7
29 Shape.WeightedHuMoments1
30 Shape.WeightedHuMoments2
31 Shape.WeightedHuMoments3
32 Shape.WeightedHuMoments4
33 Shape.WeightedHuMoments5
34 Shape.WeightedHuMoments6
35 Shape.WeightedHuMoments7
36 Shape.FSD1
37 Shape.FSD2
38 Shape.FSD3
39 Shape.FSD4
40 Shape.FSD5
41 Shape.FSD6
42 Nucleus.Intensity.Min
43 Nucleus.Intensity.Max
44 Nucleus.Intensity.Mean
45 Nucleus.Intensi

In [12]:
features_all

Unnamed: 0,Label,Identifier.Xmin,Identifier.Ymin,Identifier.Xmax,Identifier.Ymax,Identifier.CentroidX,Identifier.CentroidY,Identifier.WeightedCentroidX,Identifier.WeightedCentroidY,Orientation.Orientation,...,Intensity.part05,Intensity.part04,Intensity.part03,Intensity.part02,Intensity.part01,Intensity.distribution.part05,Intensity.distribution.part04,Intensity.distribution.part03,Intensity.distribution.part02,Intensity.distribution.part01
0,0,125.0,146.0,373.0,357.0,245.622586,257.884118,247.178247,255.288224,1.166709,...,40.593563,40.079513,39.55719,38.954287,44.318315,0.199474,0.196948,0.194381,0.191419,0.217777
1,1,125.0,103.0,375.0,400.0,268.236845,254.092781,264.079297,249.637579,-0.34976,...,33.125156,34.053303,36.709396,33.916141,34.418206,0.19234,0.197729,0.213151,0.196932,0.199848
2,2,137.0,94.0,371.0,404.0,249.56563,262.41452,236.346989,242.250776,0.457796,...,18.569447,20.280423,22.040186,21.6956,20.763945,0.179676,0.196231,0.213259,0.209924,0.20091
3,3,111.0,29.0,387.0,469.0,252.432525,254.166708,253.388013,270.02744,0.090247,...,15.560623,17.14993,17.985585,18.544664,17.877683,0.178614,0.196858,0.20645,0.212867,0.205211
4,4,164.0,153.0,335.0,353.0,257.940629,247.358741,256.025805,246.660046,0.51919,...,3.093149,3.236447,3.387352,3.469426,3.319664,0.187395,0.196077,0.205219,0.210191,0.201118
5,5,142.0,158.0,356.0,343.0,249.570698,254.417703,250.426784,250.742627,-0.892979,...,19.440965,19.78829,19.986685,20.149105,19.070619,0.197499,0.201028,0.203043,0.204693,0.193737
6,6,74.0,97.0,438.0,403.0,236.509323,232.326568,239.774884,234.839055,-1.11349,...,21.689133,23.889951,26.583166,27.766409,27.030495,0.170836,0.18817,0.209384,0.218703,0.212907
7,7,97.0,156.0,410.0,348.0,262.075434,258.487339,255.839726,261.592773,-1.365839,...,26.721403,25.629019,26.730932,29.28961,30.248751,0.192768,0.184887,0.192836,0.211295,0.218214
8,8,166.0,162.0,327.0,337.0,250.1652,251.289613,250.217096,253.825714,-0.265688,...,40.24681,36.663586,35.884125,33.782804,32.598522,0.224622,0.204623,0.200273,0.188546,0.181936
9,9,154.0,154.0,337.0,341.0,245.821486,241.683812,247.049038,246.414827,0.483895,...,22.831134,24.074465,25.809348,25.513616,25.304729,0.184818,0.194882,0.208926,0.206532,0.204841


# Extract CAM features

In [None]:
# cam_type = "ScoreCAM"
# ctrl_type = "CTRL"
# stain_type = "Dapi"

for cam_type in ["GradCAM", "ScoreCAM"]:
    for ctrl_type in ["RETT", "CTRL"]:
        for stain_type in ["All", "H3K27ac", "CTCF", "Dapi"]:
            loadname = f"{ctrl_type}_{stain_type}_Resnet10_noavg_{cam_type}"
            print(f"🚀 {loadname}")
            img_all = np.load(f"../Classification/results_cam/{loadname}/{loadname}_img.npy",allow_pickle=True)
            cam_all = np.load(f"../Classification/results_cam/{loadname}/{loadname}_cam.npy",allow_pickle=True)
            print(f"img all shape: {img_all.shape}")
            print(f"cam all shape: {cam_all.shape}")

            mask_all = []
            for img in img_all:
                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                _, thresh = cv2.threshold(gray, 0, 1, cv2.THRESH_BINARY)
                mask_all.append(thresh)
            mask_all = np.array(mask_all).astype(np.int32)
            print(f"mask all shape: {mask_all.shape}")

            # Init DataFrame to save
            features_all = pd.DataFrame()
            # Compute and Save nuclei features
            for i in tqdm(range(len(cam_all))):
                im_label = mask_all[i]
                im_nuclei = cam_all[i]
                features = hf.compute_nuclei_features(im_label=im_label, im_nuclei=im_nuclei)
                features["Label"] = i
                # Add new DataFrame to DataFrame 
                features_all = pd.concat([features_all, features], ignore_index=True)
                # Add new feature intensity distribution part 5 ~ 0 to DataFrame 
                intensity_distribution = part_distribution_intensity(im_label, im_nuclei)
                for part, label in enumerate(labels):
                    features_all[label] = intensity_distribution[part]
            # Save DataFrame as CSV
            features_name = f'features_{loadname}.csv'
            features_all.to_csv(features_name, index=False)
            print(f"🔥 Save features_all as {features_name}")

# Extract Image only correct features

In [39]:
cam_type = "ScoreCAM"
ctrl_type = "CTRL"
stain_type = "Dapi"

for cam_type in ["GradCAM"]:
    for ctrl_type in ["RETT", "CTRL"]:
        for stain_type in ["H3K27ac", "CTCF", "Dapi"]:
            loadname = f"{ctrl_type}_{stain_type}_Resnet10_noavg_{cam_type}"
            print(f"🚀 {loadname}")
            img_all = np.load(f"../Classification/results_cam/{loadname}/{loadname}_img.npy",allow_pickle=True)
            print(f"img all shape: {img_all.shape}")

            mask_all = []
            for img in img_all:
                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                _, thresh = cv2.threshold(gray, 0, 1, cv2.THRESH_BINARY)
                mask_all.append(thresh)
            mask_all = np.array(mask_all).astype(np.uint8)
            print(f"mask all shape: {mask_all.shape}")

            # Init DataFrame to save
            features_all = pd.DataFrame()
            # Compute and Save nuclei features
            for i in tqdm(range(len(img_all))):
                im_label = mask_all[i]
                im_nuclei = img_all[i,:,:,0]
                features = hf.compute_nuclei_features(im_label=im_label, im_nuclei=im_nuclei)
                features["Label"] = i
                # Add new DataFrame to DataFrame 
                features_all = pd.concat([features_all, features], ignore_index=True)
                # Add new feature intensity distribution part 5 ~ 0 to DataFrame 
                intensity_distribution = part_distribution_intensity(im_label, im_nuclei)
                for part, label in enumerate(labels):
                    features_all[label] = intensity_distribution[part]
            # Save DataFrame as CSV
            features_name = f'features_{ctrl_type}_{stain_type}_correct.csv'
            features_all.to_csv(features_name, index=False)
            print(f"                                      🔥 Save features_all as {features_name}")

🚀 RETT_H3K27ac_Resnet10_noavg_GradCAM
img all shape: (3486, 500, 500, 3)
mask all shape: (3486, 500, 500)


100%|██████████████████████████████████████████████████████████████████████████| 3486/3486 [04:50<00:00, 12.02it/s]


🔥 Save features_all as features_RETT_H3K27ac_correct.csv
🚀 RETT_CTCF_Resnet10_noavg_GradCAM
img all shape: (3518, 500, 500, 3)
mask all shape: (3518, 500, 500)


100%|██████████████████████████████████████████████████████████████████████████| 3518/3518 [04:53<00:00, 11.99it/s]


🔥 Save features_all as features_RETT_CTCF_correct.csv
🚀 RETT_Dapi_Resnet10_noavg_GradCAM
img all shape: (3484, 500, 500, 3)
mask all shape: (3484, 500, 500)


100%|██████████████████████████████████████████████████████████████████████████| 3484/3484 [04:49<00:00, 12.03it/s]


🔥 Save features_all as features_RETT_Dapi_correct.csv
🚀 CTRL_H3K27ac_Resnet10_noavg_GradCAM
img all shape: (1567, 500, 500, 3)
mask all shape: (1567, 500, 500)


100%|██████████████████████████████████████████████████████████████████████████| 1567/1567 [02:08<00:00, 12.19it/s]


🔥 Save features_all as features_CTRL_H3K27ac_correct.csv
🚀 CTRL_CTCF_Resnet10_noavg_GradCAM
img all shape: (1592, 500, 500, 3)
mask all shape: (1592, 500, 500)


100%|██████████████████████████████████████████████████████████████████████████| 1592/1592 [02:10<00:00, 12.17it/s]


🔥 Save features_all as features_CTRL_CTCF_correct.csv
🚀 CTRL_Dapi_Resnet10_noavg_GradCAM
img all shape: (1517, 500, 500, 3)
mask all shape: (1517, 500, 500)


100%|██████████████████████████████████████████████████████████████████████████| 1517/1517 [02:02<00:00, 12.37it/s]


🔥 Save features_all as features_CTRL_Dapi_correct.csv
