In [15]:
from scipy.ndimage import imread
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from math import pi as π
import os
from scipy import ndimage
from numpy.linalg import det, inv

In [16]:
img = imread("negatives/n11.png")

In [32]:
def read_images(path):
    return [imread(path + name) for name in sorted(os.listdir(path)) if name.endswith(".png")]

def brightness():
    pass

def show_images(images):
    def subplot(grid, image, index, cmap=None):
        subplot = plt.subplot(grid[i*4+index])
        subplot.imshow(img, cmap=cmap)
        subplot.axis('off')
    
    grid = gridspec.GridSpec(len(images), 4, hspace=0., wspace=0., left=0, top=1, bottom=0, right=1)
    plt.figure(figsize=(100, 100))
    for i in range(len(images)):
        subplot(grid, images[i], 0)
        subplot(grid, images[i][:,:,0], 1, 'gray')
        subplot(grid, images[i][:,:,1], 2, 'gray')
        subplot(grid, images[i][:,:,2], 3, 'gray')
    grid.update(hspace=0.)
    plt.show()

def rgb_features(images):
    return np.array([np.average(np.average(image, axis=0), axis=0) for image in images])

def contrast_feature(images):
    z = np.ones(shape=(len(images), 1))
    return z

def deviation_feature(images):
    return np.array([[ndimage.standard_deviation(image)] for image in images])

def features(images):
    f = np.concatenate((rgb_features(images), deviation_feature(images)), axis=1)
    return f
                   
def p(x, µ, Σ, φ):
    n = Σ.shape[0]
    xµ = np.matrix(x-µ).transpose()
    return 1/((2*π) ** (n/2) * det(Σ)**0.5) * np.exp(-0.5 * xµ.transpose() * inv(Σ) * xµ)

def confidence(x, µ_0, µ_1, Σ, φ):
    return abs(p(x, µ_0, Σ, φ) - p(x, µ_1, Σ, φ))

def is_parasite(x, µ_0, µ_1, Σ, φ):
    return p(x, µ_0, Σ, φ) < p(x, µ_1, Σ, φ)

def chooo_chooo(features_0, features_1):
    size = features_0.shape[1]
    µ_0 = features_0.mean(axis=0)
    µ_1 = features_1.mean(axis=0)
    Σ = np.zeros((size, size))
    φ = len(features_1)/(len(features_0) + len(features_1))
    for f in (features_0, features_1):
        for i in range(len(f)):
            Σ += np.matrix(f[i] - µ_0).transpose() * np.matrix(f[i] - µ_0)
    return (µ_0, µ_1, Σ, φ)

In [33]:
negatives = np.array(read_images("negatives/"))
positives = np.array(read_images("positives/"))

In [34]:
features_1, features_0 = (features(positives), features(negatives))
num_samples = (len(features_0) + len(features_1))

µ_0, µ_1, Σ, φ = chooo_chooo(features_0[:15], features_1[:15])
errors = 0
confidence_sum = 0
for f in features_0[15:]:
    confidence_sum += confidence(f, µ_0, µ_1, Σ, φ)
    if is_parasite(f, µ_0, µ_1, Σ, φ):
        errors += 1
for f in features_1[15:]:
    confidence_sum += confidence(f, µ_0, µ_1, Σ, φ)
    if not is_parasite(f, µ_0, µ_1, Σ, φ):
        errors += 1

print(errors / num_samples)
print(confidence_sum / num_samples)

0.06666666666666667
[[  3.51483088e-10]]


## Features
Our inital choice of 3 features being the mean of each rgb value. Yielded a 84% accuracy on whole dataset used for both training and testing.

* Test + Train set: 
    
    0.18333333333333332
    [[  8.57678845e-09]]

* standard deviation + rgb mean:

    0.06666666666666667
    [[  3.51483088e-10]]