# Features test

In [None]:
from pathlib import Path
import sys
print(sys.executable)
sys.path.append('/Users/andreachatrian/Documents/Repositories/cancer_phenotype')
import random
import time
import json
import numpy as np
import pandas as pd
import cv2
from matplotlib import pyplot as plt
from data.contours import read_annotations, get_contour_image
from data.contours.instance_masker import InstanceMasker
from data.images.wsi_reader import WSIReader
from quant.features import nuclear_features
from base.utils.timer import Timer

In [6]:
data_dir = Path('/Users/andreachatrian/Mounts/rescomp/gpfs2/well/rittscher/projects/TCGA_prostate/TCGA')
features_dir = Path('/Users/andreachatrian/Mounts/rescomp/gpfs2/well/rittscher/projects/TCGA_prostate/TCGA/data/features/combined_mpp1.0_normal_nuclei')
def view_example_features(slide_id, num_examples = 2, annotations_model='combined_mpp1.0_normal_nuclei'):
    with open(features_dir/(slide_id + '.json'), 'r') as feature_file:
        features = pd.read_json(feature_file, orient='split')
    # NOT BUILDING INDEX CORRECTLY
    with open(features_dir/(slide_id + '.json'), 'r') as feature_file:
        feats_= json.load(feature_file)
    features.index = feats_['index']
    del feats_
    with Timer(None, "Time to read annotation for 1 slide: {:0.4f} seconds"):
        slide_contours = read_annotations(data_dir, (slide_id,), annotations_model)[slide_id]
    with Timer(None, "Time to make Instance Maker: {:0.4f} seconds"):
        masker = InstanceMasker(slide_contours, 'epithelium', dict((('nuclei', 50), ('epithelium', 200), ('lumen', 250))))
    # TODO match boxes in features and boxes in masker, and check features for desired masks
    boxes = set(tuple(int(d) for d in box_id.split('_')) for box_id, data in features.iterrows())
    outer_bounding_boxes = [cv2.boundingRect(outer_contour) for outer_contour in masker.outer_contours]
    index_matches = [box for j, box in enumerate(outer_bounding_boxes) if box in boxes]
    random.seed(42)
    examples = [index_matches[random.randint(0, len(index_matches) - 1)] for i in range(num_examples)]
    # normalize features
    normalized_features = (features - features.min())/(features.max() - features.min())
    fig, axes = plt.subplots(num_examples, 3, figsize=(20, 40))
    slide_path = next(data_dir.glob(f'*/{slide_id}.svs'))
    slide = WSIReader(slide_path)
    all_example_masks, all_example_images, all_example_features = [], [], []
    for i, j in enumerate(examples):
        with Timer('make_mask', f"Time to make mask {i}: {{:0.4f}} seconds"):
            mask, components = masker[j]
        image = get_contour_image(masker.outer_contours[j], slide)
        example_features = normalized_features.iloc[i, :]
        axes[i, 0].imshow(mask)
        axes[i, 1].imshow(image)
        axes[i, 2].bar(np.arange(0, len(example_features)), example_features, 4)  # why is the max 2 when I normalised the features?
        all_example_masks.append(mask)
        all_example_images.append(image)
        all_example_features.append(example_features)
    print(f"Average time to make masks {Timer.averages['make_mask']}")
    return all_example_masks, all_example_images, all_example_features

In [7]:
with open(features_dir/('TCGA-2A-AAYF-01Z-00-DX1.D0ADEB9A-37A4-451C-B91C-4AB014A6857A' + '.json'), 'r') as feature_file:
    features = pd.read_json(feature_file, orient='split')
columns_names = features.columns
columns_names[40:75]

In [None]:
masks, examples, features = view_example_features('TCGA-2A-AAYF-01Z-00-DX1.D0ADEB9A-37A4-451C-B91C-4AB014A6857A')

Index(['outer_hu_moment0', 'outer_hu_moment1', 'outer_hu_moment2',
       'outer_hu_moment3', 'outer_hu_moment4', 'outer_hu_moment5',
       'outer_hu_moment6', 'outer_weighted_hu_moment0',
       'outer_weighted_hu_moment1', 'outer_weighted_hu_moment2',
       ...
       'gray_correlation_d5_a1.5707963267948966',
       'gray_correlation_d5_a2.356194490192345', 'gray_correlation_d20_a0',
       'gray_correlation_d20_a0.7853981633974483',
       'gray_correlation_d20_a1.5707963267948966',
       'gray_correlation_d20_a2.356194490192345', 'gray_correlation_d100_a0',
       'gray_correlation_d100_a0.7853981633974483',
       'gray_correlation_d100_a1.5707963267948966',
       'gray_correlation_d100_a2.356194490192345'],
      dtype='object', length=152)


In [15]:
from imageio import imwrite
import pickle as pkl
imwrite('~/Desktop/gland_image.png', examples[0])
imwrite('~/Desktop/mask_image.png', masks[0])
with open('~/features.pkl', 'wb') as features_file:
    pkl.dump(features[0], features_file)
#view_example_features('TCGA-J4-A6G3-01Z-00-DX1.BDCBE64A-B59F-4735-B9B4-84BAF9CB1D23')


