# Segmentation by registration and feature extraction

In [1]:
import glob
import os
import os.path
import numpy as np
import pandas as pd
import maweight
import pickle
import logging
import tqdm
from tqdm.contrib import tzip

from config import lmp_features_path, files_for_lmp_path
from config import bin_width, bin_min, bin_max, threshold

import warnings
warnings.filterwarnings('ignore')


# setting the logging format
FORMAT = '%(asctime)-15s %(clientip)s %(user)-8s %(message)s'
logging.basicConfig(format=FORMAT, level=logging.INFO)

Executables being used: /opt/elastix-5.1.0-linux/bin/elastix /opt/elastix-5.1.0-linux/bin/transformix


In [2]:
#delete all '_' character from filenames
def rename_files(files):
    new_files = [os.path.join(os.path.dirname(file), os.path.basename(file).replace("_", "")) for file in files]
    [os.rename(rfiles[0], rfiles[1]) for rfiles in zip(files, new_files)]
    return new_files

## Discovering files to process

In [3]:
images = []
images = glob.glob(os.path.join(files_for_lmp_path, '*.nii'))
images += glob.glob(os.path.join(files_for_lmp_path, '*.nii.gz'))

dissected_images= [f for f in images if '-label' not in f]
dissected_images = sorted(dissected_images)

labeled_images= [f for f in images if '-shank-label' in f]
labeled_images = sorted(labeled_images)

dissected_images = rename_files(dissected_images)
labeled_images = rename_files(labeled_images)
print(f"Number of dissected images: {len(dissected_images)}")
print(f"Number of masks: {len(labeled_images)}")

Number of dissected images: 48
Number of masks: 48


## Extracting the features

In [4]:
def extract_features(dissected_images, manually_segmented_targets, bin_min, bin_max, bin_width):
    dataframes= []
    
    #for d, m in tzip(dissected_images, manually_segmented_targets):
        #print(f'processing:  {d}, {m}')
    #for d in tqdm.tqdm(dissected_images):
    for d, m in tzip(dissected_images, manually_segmented_targets):
        #print('processing: %s' % d)
        
        fitted_masks= [m]
        
        labels= [""]

        dataframes.append(maweight.extract_features_3d(d, fitted_masks, labels, bins=list(range(bin_min, bin_max+1, bin_width)), thresholds=[threshold]))
    dataframes= pd.concat(dataframes, axis=0, ignore_index=True)
    filenames = [os.path.basename(d).split('.')[0] for d in dissected_images]
    dataframes['id']= filenames
    
    
    return dataframes

In [5]:
lmp_features= extract_features(dissected_images, labeled_images, bin_min, bin_max, bin_width)
lmp_features = lmp_features.drop(lmp_features.filter(regex='mask').columns, axis=1)
lmp_features.to_csv(lmp_features_path, index=False)

HBox(children=(FloatProgress(value=0.0, max=48.0), HTML(value='')))




In [6]:
lmp_features = lmp_features.drop(lmp_features.filter(regex='mask').columns, axis=1)
lmp_features.to_csv(lmp_features_path, index=False)