# Segmentation by registration and feature extraction

In [1]:
import glob
import os
import os.path
import numpy as np
import pandas as pd
import maweight
import pickle
import logging
import tqdm

from config import manually_segmented_path, dissected_path
from config import output_path, thigh_features_path, breast_features_path 
from config import save_registered_images
from config import bin_width, bin_min, bin_max, threshold
from config import tmp_path, elastix_params, threads

import warnings
warnings.filterwarnings('ignore')

LIMIT=None

# setting the logging format
FORMAT = '%(asctime)-15s %(clientip)s %(user)-8s %(message)s'
logging.basicConfig(format=FORMAT, level=logging.INFO)

Executables being used: /opt/elastix-5.1.0-linux/bin/elastix /opt/elastix-5.1.0-linux/bin/transformix


In [2]:
#delete all '_' character from filenames
def rename_files(files):
    new_files = [os.path.join(os.path.dirname(file), os.path.basename(file).replace("_", "")) for file in files]
    [os.rename(rfiles[0], rfiles[1]) for rfiles in zip(files, new_files)]
    return new_files

## Discovering files to process

In [3]:
manually_segmented_files = []
manually_segmented_files += sorted(glob.glob(os.path.join(manually_segmented_path, '*.nii')))
manually_segmented_files += sorted(glob.glob(os.path.join(manually_segmented_path, '*.nii.gz')))

manually_segmented_files = rename_files(manually_segmented_files)

manually_segmented_images= [f for f in manually_segmented_files if not 'breast' in 
                            f.split(os.sep)[-1] and not 'thigh' in f.split(os.sep)[-1]]
manually_segmented_breast= [f for f in manually_segmented_files if 'breast' in f]
manually_segmented_thighs= [f for f in manually_segmented_files if 'thigh' in f]

dissected_images = []
dissected_images = glob.glob(os.path.join(dissected_path, '*.nii'))
dissected_images += glob.glob(os.path.join(dissected_path, '*.nii.gz'))
dissected_images = sorted(dissected_images)

if LIMIT:
    dissected_images= dissected_images[:LIMIT]

dissected_images = rename_files(dissected_images)
print(f"Number of dissected images: {len(dissected_images)}")
print(f"Number of manually segmented images: {len(manually_segmented_images)}")
print(f"Number of thigh masks: {len(manually_segmented_thighs)}")
print(f"Number of breast masks: {len(manually_segmented_breast)}")

Number of dissected images: 60
Number of manually segmented images: 16
Number of thigh masks: 16
Number of breast masks: 16


## Segmentation by Registration

In [4]:
for d in tqdm.tqdm(dissected_images):
    for (i, h, m) in zip(manually_segmented_images, manually_segmented_breast, manually_segmented_thighs):
        output_thigh= os.path.join(output_path, d.split(os.sep)[-1] + '_' + m.split(os.sep)[-1])
        output_breast= os.path.join(output_path, d.split(os.sep)[-1] + '_' + h.split(os.sep)[-1])
        if save_registered_images:
            output_registered= os.path.join(output_path, d.split(os.sep)[-1] + '_' + i.split(os.sep)[-1])
        else:
            output_registered= None
        if (not os.path.isfile(output_thigh) or not os.path.isfile(output_breast) or 
            (save_registered_images and not os.path.isfile(output_registered))):
            maweight.register_and_transform(i, d, [m, h], [output_thigh, output_breast], 
                                            registered_image_path= output_registered, threads= threads, params= elastix_params, work_dir= tmp_path, verbose= 0)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 8648.35it/s]


## Extracting the features

In [5]:
def extract_features(dissected_images, manually_segmented_targets):
    dataframes= []
    
    for d in tqdm.tqdm(dissected_images):
        #print('processing: %s' % d)
        
        fitted_masks= []
        
        for m in manually_segmented_targets:
            output_thigh= os.path.join(output_path, d.split(os.sep)[-1] + '_' + m.split(os.sep)[-1])
            fitted_masks.append(output_thigh)
        labels= [f.split(os.sep)[-1].split('_')[-1] for f in fitted_masks]

        dataframes.append(maweight.extract_features_3d(d, fitted_masks, labels, bins=list(range(bin_min, bin_max+1, bin_width)), thresholds=[threshold]))
    dataframes= pd.concat(dataframes, axis=0, ignore_index=True)
    dataframes['filename']= dissected_images
    
    
    return dataframes

In [6]:
thigh_features= extract_features(dissected_images, manually_segmented_thighs)
thigh_features.to_csv(thigh_features_path, index=False)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 60/60 [05:02<00:00,  5.05s/it]


In [7]:
breast_features= extract_features(dissected_images, manually_segmented_breast)
breast_features.to_csv(breast_features_path, index=False)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 60/60 [05:39<00:00,  5.65s/it]


In [8]:
thigh_features.to_csv(thigh_features_path, index=False)
breast_features.to_csv(breast_features_path, index=False)