### Extract Retinal Features in Parallel

In [12]:
from IPython.parallel import Client
c = Client()

In [13]:
%%px --local
import numpy as np
import pandas as pd
#from dark_bright_detector import DarkBrightDetector
from kobra.dr import Labels
from kobra import TrainFiles
from kobra.tr_utils import prep_out_path, time_now_str
import os
from os import path
import mahotas as mh
import mahotas.labeled as mhl
import cv2
import time

preprocessed = '/kaggle/retina/train/labelled'
masks = '/kaggle/retina/train/masks'
orig = '/kaggle/retina/train/sample/split'
output = '/kaggle/retina/train/sample/features'

n_bins = 100
%run ./DarkBright/dark_bright_detector.py

In [14]:
prep_out_path(output)

In [15]:
def extract_features(im_file):
    i = int(path.split(im_file)[0])
    extractor = DarkBrightDetector(preprocessed, orig, im_file, masks, is_debug = False)
    labels = extractor.find_bright_regions()

    drusen = extractor.get_predicted_region(Labels.Drusen)
    blood = extractor.get_predicted_region(Labels.Haemorage)

    Bc = np.ones((5, 5))
    labels_drusen, n_drusen = mh.label(drusen, Bc)
    labels_blood, n_blood = mh.label(blood, Bc)

    area = float(cv2.countNonZero(extractor.mask))

    outp = np.array([], dtype = np.int)

    # sizes excluding background
    sizes_drusen = mhl.labeled_size(labels_drusen)[1:] / area
    sizes_blood = mhl.labeled_size(labels_blood)[1:] / area

    hist_druzen, _ = np.histogram(sizes_drusen, n_bins, (0, 1e-3))
    hist_blood, _ = np.histogram(sizes_blood, n_bins, (0, 1e-3))

    outp = np.r_[outp, hist_druzen]
    outp = np.r_[outp, hist_blood]
    outp = np.r_[outp, i]
    return outp            

In [5]:
im_file = '0/10099_right.jpeg'
extract_features(im_file)

array([ 0, 31, 18, 12,  6,  5,  1,  1,  2,  1,  3,  1,  0,  0,  0,  1,  2,
        0,  0,  0,  1,  1,  1,  1,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  1,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,
        0,  0,  0,  0,  1,  0,  1,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,
        0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0, 19, 11,
        2,  2,  1,  1,  0,  1,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0])

In [16]:
file_list = []
for i in range(0, 5):
    prefix = str(i)
    files = os.listdir(path.join(preprocessed, prefix))
    files = map(lambda f: path.join(prefix, f), files)
    file_list.extend(files)
                       

In [18]:
dv = Client()[:]
features = dv.map(extract_features, np.array(file_list))
features.wait()
df = pd.DataFrame(data = features[:])

df.to_csv(path.join(output, "features.txt"), index = False, header = False)

TypeError: DataFrame constructor called with incompatible data and dtype: cannot copy sequence with size 8 to array axis with dimension 3516