In [1]:
import sys
sys.path.append('../..')
import os
import argparse
import tempfile
import warnings
import shutil

import keras
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import numpy as np
import keras
from tqdm import tqdm
import requests

import keras_utils as ku
import cr_interface as cri
import lib

def notify(string='done'):
    # send a personal notification to Namgyu
    
    headers = {
        'Content-type': 'application/json',
    }
    data = '{"text":"[CREB1] %s"}' % string
    
    response = requests.post('https://hooks.slack.com/services/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', headers=headers, data=data)

Using TensorFlow backend.


In [2]:
fc = cri.CrCollection.load().sample(frac=0.02).labeled().tri_label()

In [3]:
BOTTLENECK_DIR = os.path.join(cri.PROJECT_DIR, 'bottlenecks')
TEMP_DIR = os.path.join(cri.PROJECT_DIR, '.keras_bottle_temp')

def _get_temp_dir():
    os.makedirs(TEMP_DIR, exist_ok=True)
    return TEMP_DIR

def get_bottleneck_dir(model, mkdir=True):
    subdir = '{}_{}'.format(model.name.replace('.', '_'), len(model.layers))
    path = os.path.join(BOTTLENECK_DIR, subdir)

    if mkdir:
        os.makedirs(path, exist_ok=True)

    return os.path.abspath(path)

def get_bottleneck_path(model, cr_code, aug, index=0):
    dirname = get_bottleneck_dir(model)
    if aug:
        basename = '{}_AUG_{:d}.npy'.format(cr_code, index)
    else:
        if index != 0:
            warnings.warn('index ignored on aug=False')
        basename = '{}.npy'.format(cr_code)
    return os.path.join(dirname, basename)

def bottleneck_exists(model, cr_code, aug: bool, index=0):
    # optimization considerations: currently takes 35s per 1,0000,000 calls
    return os.path.exists(get_bottleneck_path(model, cr_code, aug, index))

In [4]:
mobile = ku.applications['mobilenet']
model = mobile.get_model()
collection = cri.CrCollection.load().labeled().filter_by(dataset_index=1)

loading mobilenet model


In [5]:
def _get_nth_bottleneck_collection(app, collection, aug, index=0, model=None):
    '''
    Filter the collection to leave of just the ones for
    which we need to generate bottlenecks
    '''
    if len(collection.df) == 0:
        warnings.warn('empty collection')
        return collection
    
    if not model:
        model = app.get_model()
        
    def should_generate(cr_code):
        return not bottleneck_exists(model, cr_code, aug, index)
    df = collection.df
    df = df.loc[df.apply(lambda x: should_generate(x['cr_code']), axis=1)]
    df = df.sort_values('cr_code')
    
    return cri.CrCollection(df)

In [6]:
_get_nth_bottleneck_collection(mobile, fc.filter_by(label='in'), aug=True, index=0).df

Unnamed: 0,cr_code,dataset_index,pid,phase_index,slice_index,label,original_name,original_filepath
0,D00_P00001401_P00_S05,0,1401,0,5,in,DET0001401_SA8_ph0,cap_challenge/DET0001401/DET0001401_SA8_ph0.dcm
1,D00_P00001701_P14_S02,0,1701,14,2,in,DET0001701_SA3_ph14,/Users/release/.ho/cardiac-research/slice_rang...
2,D00_P00001801_P14_S05,0,1801,14,5,in,DET0001801_SA6_ph14,/Users/release/.ho/cardiac-research/slice_rang...
3,D00_P00002701_P00_S02,0,2701,0,2,in,DET0002701_SA10_ph0,cap_challenge/DET0002701/DET0002701_SA10_ph0.dcm
4,D00_P00002901_P00_S13,0,2901,0,13,in,DET0002901_SA3_ph0,cap_challenge/DET0002901/DET0002901_SA3_ph0.dcm
5,D00_P00003101_P00_S02,0,3101,0,2,in,DET0003101_SA12_ph0,cap_challenge/DET0003101/DET0003101_SA12_ph0.dcm
6,D00_P00003101_P00_S03,0,3101,0,3,in,DET0003101_SA11_ph0,cap_challenge/DET0003101/DET0003101_SA11_ph0.dcm
7,D00_P00003401_P00_S13,0,3401,0,13,in,DET0003401_SA4_ph0,cap_challenge/DET0003401/DET0003401_SA4_ph0.dcm
8,D00_P00004001_P00_S03,0,4001,0,3,in,DET0004001_SA11_ph0,cap_challenge/DET0004001/DET0004001_SA11_ph0.dcm
9,D00_P00005501_P00_S01,0,5501,0,1,in,DET0005501_SA2_ph0,cap_challenge/DET0005501/DET0005501_SA2_ph0.dcm


In [14]:
def generate_bottlenecks(app, base_collection, aug, count=1, model=None, verbose=1):
    if len(base_collection.df) == 0:
        warnings.warn('empty collection')
        return
    if count != 1 and not aug:
        warnings.warn('count ignored when aug is false')
        count = 1
    if not model:
        model = app.get_model()

    total = 0
    collections = []
    for i in range(count):
        c = _get_nth_bottleneck_collection(
            app, base_collection, model=model, aug=aug, index=i)
        collections.append(c)
        total += len(c.df)

    print('loading {} of {} * {} bottlenecks'.format(
        total, len(base_collection.df), count))

    done = 0
    for index, collection in enumerate(collections):
        if len(collection.df) == 0: continue

        temp = _get_temp_dir()
        image_dir = os.path.join(temp, 'images')
        shutil.rmtree(image_dir, ignore_errors=True)
        default_class = os.path.join(image_dir, 'default_class')

        collection.export(default_class, by_label=False)

        gen = app.get_image_data_generator(augment=aug).flow_from_directory(
            image_dir,
            target_size=app.image_size,
            batch_size=128,
        )

        data = model.predict_generator(gen)

        temp_bottle_path = os.path.join(temp, 'temp.npy')
        for i, cr_code in enumerate(collection.get_cr_codes()):
            # prevent corrupt data in main bottleneck directory
            path = get_bottleneck_path(model, cr_code, aug, index=index)
            np.save(temp_bottle_path, data[i])
            os.rename(temp_bottle_path, path)

        done += len(collection.df)

In [19]:
def generate_all_bottlenecks(app, collection=None, augmentation=5, balancing=5):
    if collection:
        c = collection.tri_label()
        if len(c.df) == 0:
            warnings.warn('empty collection')
            return
    else:
        c = cri.CrCollection.load().labeled().tri_label()
        
    c0 = c.filter_by(dataset_index=0)
    c1 = c.filter_by(dataset_index=1)
    
    c0_out = c0.filter_by(label=['oap', 'obs'])
    c0_in = c0.filter_by(label='in')
    c1_out = c1.filter_by(label=['oap', 'obs'])
    c1_in = c1.filter_by(label='in')
    
    print('(1/3) loading unaugmented bottlenecks'.center(100, '-'))
    generate_bottlenecks(app, c, aug=False)
    
    print('(2/3) loading train bottlenecks (in)'.center(100, '-'))
    generate_bottlenecks(app, c0_in, aug=True, count=augmentation)
    
    print('(3/3) loading train bottlenecks (out)'.center(100, '-'))
    generate_bottlenecks(app, c0_out, aug=True, count=augmentation * balancing)


In [20]:
try:
    generate_all_bottlenecks(mobile, collection=fc, augmentation=5, balancing=5)
finally:
    notify()

-------------------------------(1/5) loading unaugmented bottlenecks--------------------------------
loading 0 of 57 * 1 bottlenecks
--------------------------------(2/5) loading train bottlenecks (in)--------------------------------
loading 0 of 35 * 5 bottlenecks
-------------------------------(3/5) loading train bottlenecks (out)--------------------------------
loading 0 of 12 * 25 bottlenecks
--------------------------------(4/5) loading test bottlenecks (in)---------------------------------
loading 0 of 8 * 1 bottlenecks
--------------------------------(5/5) loading test bottlenecks (out)--------------------------------
loading 0 of 2 * 1 bottlenecks


In [21]:
try:
    generate_all_bottlenecks(ku.applications['inceptionresnetv2'],
                             collection=cri.CrCollection.load().sample(frac=0.025),
                             augmentation=5, balancing=5)
finally:
    notify()

-------------------------------(1/5) loading unaugmented bottlenecks--------------------------------
loading 122 of 126 * 1 bottlenecks
Found 122 images belonging to 1 classes.
--------------------------------(2/5) loading train bottlenecks (in)--------------------------------
loading 229 of 46 * 5 bottlenecks
Found 45 images belonging to 1 classes.
Found 46 images belonging to 1 classes.
Found 46 images belonging to 1 classes.
Found 46 images belonging to 1 classes.
Found 46 images belonging to 1 classes.
-------------------------------(3/5) loading train bottlenecks (out)--------------------------------
loading 375 of 15 * 25 bottlenecks
Found 15 images belonging to 1 classes.
Found 15 images belonging to 1 classes.
Found 15 images belonging to 1 classes.
Found 15 images belonging to 1 classes.
Found 15 images belonging to 1 classes.
Found 15 images belonging to 1 classes.
Found 15 images belonging to 1 classes.
Found 15 images belonging to 1 classes.
Found 15 images belonging to 1 c