In [1]:
import os
import numpy as np
import cv2
import json
from scipy.io import loadmat
import h5py
import pandas as pd
from tifffile import imread

In [2]:
# def json2df():
def cntarea(cnt):
    cnt = np.array(cnt).astype(np.float32)
    area = cv2.contourArea(cnt)
    return area

def cntperi(cnt):
    cnt = np.array(cnt).astype(np.float32)
    perimeter = cv2.arcLength(cnt,True)
    return perimeter

def cntMA(cnt):
    cnt = np.array(cnt).astype(np.float32)
    #Orientation, Aspect_ratio
    [(x,y),(MA,ma),orientation] = cv2.fitEllipse(cnt)
    return [np.max((MA,ma)),np.min((MA,ma)),orientation]

In [3]:
pth_crop_info_dir = r'\\10.99.68.178\andreex\data\monkey fetus\gestational 40\2_5x\cropped_images\bounding_boxes'
crop_mat_list = sorted(os.listdir(pth_crop_info_dir))
crop_mat_list = [file for file in crop_mat_list if 'monkey' in file]

In [4]:
crop_mat_list[:5]

['monkey_fetus_40_0001.mat',
 'monkey_fetus_40_0002.mat',
 'monkey_fetus_40_0003.mat',
 'monkey_fetus_40_0004.mat',
 'monkey_fetus_40_0005.mat']

In [5]:
#input folder
jsonsrc = r'\\10.99.68.178\andreex\data\monkey fetus\gestational 40\StarDist_11_21_23\json'
#output folder creation
dst = r'\\10.99.68.178\andreex\data\monkey fetus\gestational 40\StarDist_11_21_23\json_with_features'
if not os.path.exists(dst):
    os.mkdir(dst)

In [6]:
jsons = sorted(os.listdir(jsonsrc))
jsons = [f for f in jsons if f.endswith('.json')]
print(jsons[0:5])

['monkey_fetus_40_0001.json', 'monkey_fetus_40_0002.json', 'monkey_fetus_40_0003.json', 'monkey_fetus_40_0004.json', 'monkey_fetus_40_0005.json']


In [7]:
# sometimes loading .mat file works with loadmat function from scipi.io, sometimes it doesn't and needs h5py module, so both cases are accounted for here

crop_data = []

for i, file in enumerate(crop_mat_list):
    mat_file_name = os.path.join(pth_crop_info_dir, file)
    try:
        data = loadmat(mat_file_name)

        #print('scipy.io loadmat')
        bb = [int(x) for x in data['bb']]
        crop_data.append(bb)
        #print(bb)

    except:
        data = h5py.File(mat_file_name, 'r')

        #print('h5py')
        bb = [int(x) for x in data['bb'][()]]
        crop_data.append(bb)
        #print(bb)

In [8]:
# pixel res:
pth_pixel_res = r'\\10.99.68.178\andreex\data\monkey fetus\gestational 40\segmentation_analysis\pix_res_info'
pixel_res_info = sorted(os.listdir(pth_pixel_res))
pixel_res_files = [file for file in pixel_res_info if file.endswith('.mat')]

In [11]:
pixel_res_info = []

for i, file in enumerate(pixel_res_files):
    mat_file_name = os.path.join(pth_pixel_res, file)
    try:
        data = loadmat(mat_file_name)

        #print('scipy.io loadmat')
        x = float(data['pix_res']['x'][0][0][0])
        y = float(data['pix_res']['x'][0][0][0])
        pixel_res_info.append([x, y])

    except:
        data = h5py.File(mat_file_name, 'r')

        #print('h5py')
        x = float(data['pix_res'][()]['x'][0][0][0])
        y = float(data['pix_res'][()]['x'][0][0][0])
        pixel_res_info.append([x, y])

In [12]:
print(f'length crop data: {len(crop_data)}')
print(f'length jsons: {len(jsons)}')
print(f'length res info: {len(pixel_res_info)}')

length crop data: 1154
length jsons: 1154
length res info: 1154


In [13]:
print(crop_data[0])
print(jsons[0])
print(pixel_res_info[0])

[2279, 5816, 2910, 5498]
monkey_fetus_40_0001.json
[0.44157908681444846, 0.44157908681444846]


In [35]:
model1_pth = r'\\10.99.68.178\andreex\data\monkey fetus\gestational 40\5x\cropped_images\classification_MODEL1_6_21_2023_all_annotations'
model1 = sorted(os.listdir(model1_pth))
model1 = [im for im in model1 if im.endswith('.tif')]

In [57]:
outpth_pickle = r'\\10.99.68.178\andreex\data\monkey fetus\gestational 40\segmentation_analysis\model1_class_info_pickles_12_4'

In [99]:
for i, file in enumerate(jsons):
    i = 864
    file = jsons[i]
    fname = os.path.join(jsonsrc, file)
    print(fname)

    outpth = os.path.join(outpth_pickle, ''.join([file[:-5], '.pickle'])) # without .json
    # print(outpth)
    if not os.path.exists(outpth):

        imID = int(fname[-9:-5])

        with open(fname, 'r') as file_n:
            data = json.load(file_n)

        m1_im_pth = os.path.join(model1_pth, model1[i])
        m1_im = imread(m1_im_pth)

        crop_file_data = [num*downsize2_5x for num in crop_data[i]]  # multiple bounding box data by 2_5x crop ratio to get offsets for 20x points
        #print(crop_file_data)

        pix_res = pixel_res_info[i][0]  # this is assuming x and y pix size are equal

        downsize10x = 1/pix_res
        downsize5x = 2/pix_res
        downsize2_5x = 4/pix_res

        #print(crop_file_data)

        # sometimes Andre drew cropping box from bottom to top, and this messed up my code, so I made this to fix it
        #if crop_file_data[3] < crop_file_data[2]:
        #    temp = crop_file_data[2]
        #    crop_file_data[2] = crop_file_data[3]
        #    crop_file_data[3] = temp

        #if crop_file_data[1] < crop_file_data[0]:
        #    temp = crop_file_data[0]
        #    crop_file_data[0] = crop_file_data[1]
        #    crop_file_data[1] = temp

        #print(crop_file_data)




        x_offset = crop_file_data[0] - 1  # 0 to 1 indexing fix, probably right idk
        y_offset = crop_file_data[2] - 1

        centroids = []
        contours = []

        for entry in data:
            cent = entry['centroid'][0]
            cent = [cent[0] - x_offset, cent[1] - y_offset]  # coord might be backwards, idk
            cent = [num/downsize5x for num in cent]

            x_end = (crop_file_data[1] - x_offset)/downsize5x
            y_end = (crop_file_data[3] - y_offset)/downsize5x

            if (0 <= cent[0] < y_end) and (0 <= cent[1] < x_end):
                centroids.append(cent)
                c = entry['contour'][0]
                contour = []
                for j in range(len(c[0])):
                    point = [c[0][j] - x_offset, c[1][j] - y_offset]
                    point = [num/downsize5x for num in point]
                    contour.append(point)
                contours.append(contour)

        centroids = np.array(centroids)
        contours = np.array(contours)

        if len(centroids) == 0:
            print(f'Skipping {file}')
        else:

            areas = []
            perimeters = []
            circularities = []
            aspect_ratios = []
            image_ids = []
            classes = []

            compactness_a, eccentricity_a, euler_number_a, extent_a, form_factor_a, maximum_radius_a, mean_radius_a, median_radius_a, minor_axis_length_a, orientation_degrees_a = [], [], [], [], [], [], [], [], [], []

            for j, centroid in enumerate(centroids):
                contour = contours[j]
                area = cntarea(contour)
                perimeter = cntperi(contour)
                circularity = 4 * np.pi * area / perimeter ** 2
                MA = cntMA(contour)
                [MA, ma, orientation] = MA
                aspect_ratio = MA / ma
                imID = imID
                clas = m1_im[round(centroid[0]), round(centroid[1])]
                center_x = centroid[0]
                center_y = centroid[1]

                # Additional features
                compactness = perimeter ** 2 / area
                eccentricity = np.sqrt(1 - (ma / MA) ** 2)
                extent = area / (MA * ma)
                form_factor = (perimeter ** 2) / (4 * np.pi * area)
                major_axis_length = MA
                maximum_radius = np.max(np.linalg.norm(contour - centroid, axis=1))
                mean_radius = np.mean(np.linalg.norm(contour - centroid, axis=1))
                median_radius = np.median(np.linalg.norm(contour - centroid, axis=1))
                minor_axis_length = ma
                orientation_degrees = np.degrees(orientation)

                areas.append(area)
                perimeters.append(perimeter)
                circularities.append(circularity)
                aspect_ratios.append(aspect_ratio)
                image_ids.append(imID)
                classes.append(clas)

                # additional features
                compactness_a.append(compactness)
                eccentricity_a.append(eccentricity)
                extent_a.append(extent)
                form_factor_a.append(form_factor)
                maximum_radius_a.append(maximum_radius)
                mean_radius_a.append(mean_radius)
                median_radius_a.append(median_radius)
                minor_axis_length_a.append(minor_axis_length)
                orientation_degrees_a.append(orientation_degrees)

            dat = {
                'Centroid_x': centroids[:,0],
                'Centroid_y': centroids[:,1],
                'Class': classes,
                'Area': areas,
                'Perimeter': perimeters,
                'Circularity': circularities,
                'Aspect Ratio': aspect_ratios,

                'compactness' : compactness_a,
                'eccentricity' : eccentricity_a,
                'extent' : extent_a,
                'form_factor' : form_factor_a,
                'maximum_radius' : maximum_radius_a,
                'mean_radius' : mean_radius_a,
                'median_radius' : median_radius_a,
                'minor_axis_length' : minor_axis_length_a,
                'orientation_degrees' : orientation_degrees_a,

                'Image ID': image_ids
            }

            df = pd.DataFrame(dat)

            df.to_pickle(outpth)
    break


\\10.99.68.178\andreex\data\monkey fetus\gestational 40\StarDist_11_21_23\json\monkey_fetus_40_0895.json


In [81]:
centroids = []
contours = []

for entry in data:
    cent = entry['centroid'][0]
    cent = [cent[0] - x_offset, cent[1] - y_offset]  # coord might be backwards, idk
    cent = [num/downsize5x for num in cent]
    print(cent)
    print

    x_end = (crop_file_data[1] - x_offset)/downsize5x
    y_end = (crop_file_data[3] - y_offset)/downsize5x

    if (0 <= cent[0] < y_end) and (0 <= cent[1] < x_end):
        centroids.append(cent)
        c = entry['contour'][0]
        contour = []
        for j in range(len(c[0])):
            point = [c[0][j] - x_offset, c[1][j] - y_offset]
            point = [num/downsize5x for num in point]
            contour.append(point)
        contours.append(contour)

centroids = np.array(centroids)
contours = np.array(contours)

print(centroids)

[-6270.693279166298, -5065.020577585446]
[-6300.2790779828665, -5388.256469133621]
[-6300.720657069681, -5724.739733286231]
[-6299.837498896052, -5306.564338072949]
[-5935.9763313609465, -5333.5006623686295]
[-6299.837498896052, -5283.1606464717825]
[-6300.2790779828665, -5506.158085313079]
[-6300.720657069681, -5525.145986046101]
[-6299.837498896052, -5403.711737172127]
[-5968.653183785216, -5283.1606464717825]
[-6306.019606111455, -4928.131060672967]
[-6303.8117106773825, -5124.192175218582]
[-6302.486973416939, -5147.595866819747]
[-6304.253289764197, -5059.721628543672]
[-5890.052106332244, -5168.350083900026]
[-6306.019606111455, -4936.079484235626]
[-6299.837498896052, -5311.863287114722]
[-6300.720657069681, -5444.337013159056]
[-6306.461185198269, -5015.122140775413]
[-6295.421708027908, -5516.314404309812]
[-6301.162236156496, -5462.883334805263]
[-6294.096970767464, -4343.480349730637]
[-6298.5127616356085, -4670.690453060143]
[-6309.110659719156, -4558.087785922458]
[-6310.4

In [87]:
print(x_end)
print(y_end)

256.22078954340736
2624.220789543407


In [50]:
df.head()

Unnamed: 0,Centroid_x,Centroid_y,Class,Area,Perimeter,Circularity,Aspect Ratio,Image ID
0,195.819659,917.834496,14,3.80246,7.232203,0.913552,1.246588,1
1,288.109688,2089.785393,14,2.069219,5.637719,0.818106,1.571923,1
2,658.594542,691.304425,14,1.700457,5.131146,0.811609,1.1394,1
3,353.904972,360.561689,14,1.154875,4.31529,0.779336,1.233959,1
4,467.832377,446.669611,14,1.310987,4.574769,0.787172,1.198028,1


In [52]:
np.unique(df['Class'])

array([12, 14], dtype=uint8)

In [90]:
import CellProfiler as cp

ModuleNotFoundError: No module named 'CellProfiler'