# Adapt JFnet to healthy vs. diseased decision on Kaggle DR data

### Motivation

* optretina: label noise & transfer of JFnet to new dataset with potentially differing statistics
* kaggle dr: this should give us a feeling on how good we can get roughly

In [None]:
import theano
import theano.tensor as T
import lasagne
import numpy as np
import sys
sys.path.append('..')
import os

from progressbar import ProgressBar
import models
from datasets import KaggleDR

## Image preprocessing

In [None]:
# Training images
%run ../convert_JF.py --directory=../data/kaggle_dr/train --convert_directory=../data/kaggle_dr/train_JF_512 --crop_size=512 --extension=jpeg --n_proc=4

# Test images
# TODO: I don't have the raw test images get the resized ones from vaneeda
%run ../convert_JF.py --directory=../data/kaggle_dr/test --convert_directory=../data/kaggle_dr/test_JF_512 --crop_size=512 --extension=jpeg --n_proc=4

## Feature extraction

from Jeffrey de Fauw's network, originally trained on Kaggle DR competition

In [None]:
def extract_features(source_path=None, source_filenames=None, last_layer=None, outfile=None, batch_size=2):
    input_var = T.tensor4('inputs')
    weights = '../models/jeffrey_df/2015_07_17_123003_PARAMSDUMP.pkl'
    network = models.jeffrey_df(input_var=input_var, width=512, height=512, filename=weights)
    output_layer = network[last_layer]

    feature_activations = lasagne.layers.get_output(output_layer)
    forward_pass = theano.function([input_var], feature_activations)

    dataset = KaggleDR(path_data=source_path, filename_targets=source_filenames,
                       preprocessing=KaggleDR.jf_trafo)

    n_features = np.prod(output_layer.output_shape[1:])
    if os.path.exists(outfile):
        print 'Feature file already exists, aborting.'
        return

    fp = np.memmap(outfile, dtype=theano.config.floatX, mode='w+',
                   shape=(dataset.n_samples,) + output_layer.output_shape[1:]) # Each sample might still be of dim > 1

    n_batches = np.ceil(dataset.n_samples/batch_size)
    p = ProgressBar(n_batches)
    
    for i, batch in enumerate(dataset.iterate_minibatches(np.arange(dataset.n_samples), batch_size)):
        p.animate(i)
        inputs, _ = batch
        fp[i*batch_size:min((i+1)*batch_size, dataset.n_samples)] = forward_pass(inputs)
        
    del fp # close memory mapped array
    
    return

In [None]:
last_layer = '18'

In [None]:
# Training images
extract_features(source_path='../data/kaggle_dr/train_JF_512', 
                 source_filenames='../data/kaggle_dr/trainLabels_bin.csv', 
                 last_layer=last_layer, 
                 outfile='../data/kaggle_dr/feat_train_' + last_layer + '.npy',
                 batch_size=2)

In [None]:
# Test images
extract_features(source_path='../data/kaggle_dr/test_JF_512', 
                 source_filenames='../data/kaggle_dr/retinopathy_solution_bin.csv', 
                 last_layer=last_layer, 
                 outfile='../data/kaggle_dr/feat_test_' + last_layer + '.npy',
                 batch_size=2)


## Train classifier on fixed JFnet features

## Analyse performance

## If not satisfying consider to fine-tune JFnet features jointly with classifier
### This is computationally more expensive and requires to fuse the two models for which one of them is written in lasagne and the other one in keras!