# Extract candidate features from an image

In [1]:
%matplotlib inline

import os
import sys
import glob

import numpy as np
import pandas as pd
import scipy as sp
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from functools import partial

In [3]:
from numba import jit

## Change path as appropriate for where your target images are

In [10]:
pos_path = os.path.join('mouse_16.5_20x', 'distal_acinar_tubule')
neg_path = os.path.join('mouse_16.5_20x', 'distal_acinar_tubule_negative')

## Candidate feature functions

Currently all are color-based and return a scalar for the 2D array for each color (R, G,, B).

In [5]:
def mean_color(xs):
    """Return a vector of the mean value for each color."""
    return np.ma.mean(xs, axis=(0,1))

def std_color(xs):
    """Return a vector of the standard deviation for each color."""
    return np.ma.std(xs, ddof=1, axis=(0,1))

@jit
def moment_color(xs, d=2):
    """Returns the central moments of order d.
    
    May not be numerically stable since there is risk of catastrophic cancellation.
    """
    ans = np.zeros(3)
    for i in range(3):
        ys = np.ma.masked_array(xs[:,:,i])
        m = np.ma.sum((ys - np.ma.mean(ys))**d)
        ans[i] = m/ys.count()
    return ans

def pm_area_ratio(xs):

    """Calculate perimeter and area ratio for region inside contour."""
    
    xs_avg = (xs[:,:,0]+xs[:,:,1]+xs[:,:,2])/3
    len_h = []
    len_v = []
    for i in range (0,xs_avg.shape[0]-1):
        lengthi = sum(xs_avg[i+1,:]==-1) - sum(xs_avg[i,:]==-1)
        len_h.append(abs(lengthi))

    for j in range (0,xs_avg.shape[1]-1):
        lengthj = sum(xs_avg[:,j+1]==-1) - sum(xs_avg[:,j]==-1)
        len_v.append(abs(lengthj))

    perimeter = sum(len_h) + sum(len_v)
    area = np.sum(xs[:,:] != -1)
    ratio = perimeter/area
    return np.array(ratio)

# Pleas add more features that you think might be useful
# Each function should return a list or vector, even if it returns a scalar

In [6]:
feature_map = dict(
    mean_color=mean_color, 
    std_color=std_color, 
    moment_color_2=moment_color,
    moment_color_3=partial(moment_color, d=3),
    pm_area_ratio=pm_area_ratio
    )

In [7]:
def build_features(xs, features):
    ans = []
    for feature in features:
        f = feature_map[feature]
        ans.append(f(xs))
    return np.concatenate(ans)

## Construct a vector of features for each image

In [8]:
features = ['mean_color', 'std_color', 'moment_color_2', 'moment_color_3', 'pm_area_ratio']
image_features = {}
pos_files = glob.glob(os.path.join(pos_path, '*npy'))
for f in pos_files[:3]:
    path, filename = os.path.split(f)
    name, ext = os.path.splitext(filename)
    xs = np.load(f)
    ys = np.ma.masked_array(xs, mask=xs==-1)
    image_features[name] = build_features(ys, features)

In [9]:
image_features

{'2015-04-029_20X_C57Bl6_E16.5_LMM.14.24.4.46_SOX9_SFTPC_ACTA2_001__1002,2261_': masked_array(data = [  1.08400208e+02   1.92591476e+02   1.17720028e+02   3.29237381e+01
    5.28633678e+01   3.94607753e+01   1.08359694e+03   2.79356734e+03
    1.55661323e+03  -2.31908577e+04  -1.32818538e+05   6.62708433e+04],
              mask = False,
        fill_value = 1e+20),
 '2015-04-029_20X_C57Bl6_E16.5_LMM.14.24.4.46_SOX9_SFTPC_ACTA2_001__1013,863_': masked_array(data = [  1.03080769e+02   2.08451923e+02   1.48193910e+02   3.58055364e+01
    5.37714840e+01   6.13857755e+01   1.28162553e+03   2.89044577e+03
    3.76700567e+03  -6.11707902e+03  -2.24180854e+05   6.83214785e+04],
              mask = False,
        fill_value = 1e+20),
 '2015-04-029_20X_C57Bl6_E16.5_LMM.14.24.4.46_SOX9_SFTPC_ACTA2_001__1016,1004_': masked_array(data = [  1.27169851e+02   2.27410120e+02   1.32866596e+02   3.09789564e+01
    3.88830134e+01   6.50199444e+01   9.59356147e+02   1.51135374e+03
    4.22609721e+03  -3.