<h1> In this notebook, we will implement a neural network that regresses volume against segmentation + depth map </h1>

In [None]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('/root/alok/github/cv_research/alok')

import glob
import json
import os
import numpy as np
from PIL import Image

import keras
from keras.models import Model
from keras.applications.vgg16 import VGG16
from keras.layers import Input, Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras.datasets import mnist
from keras import backend as K
from keras.backend import tf as ktf

from matplotlib import pyplot as plt
from matplotlib import cm
from biomass_utils.points_of_interest import get_point_cloud

%matplotlib inline

os.environ["CUDA_VISIBLE_DEVICES"]="1"
np.random.seed(1)

In [None]:
# Get data ready

data_path_base = '/root/data/blender_v3'
image_dir = '{}/{}'.format(data_path_base, 'stereo_images')
annotation_dir = '{}/{}'.format(data_path_base, 'annotations')

number_key = lambda x: int(os.path.splitext(os.path.basename(x))[0].split('_')[-1])
side_key = lambda x: os.path.splitext(os.path.basename(x))[0].split('_')[0]
all_image_paths = sorted(glob.glob('{}/*.png'.format(image_dir)), key=number_key)
left_image_paths = [p for p in all_image_paths if side_key(p) == 'left'] 
right_image_paths = [p for p in all_image_paths if side_key(p) == 'right']
annotation_paths = sorted(glob.glob('{}/*.json'.format(annotation_dir)), key=number_key) 
complete_data_list = zip(left_image_paths, right_image_paths, annotation_paths)


TRAINING_SIZE = 500
train_data_list = [v for i, v in enumerate(complete_data_list) if i < TRAINING_SIZE]
test_data_list = [v for i, v in enumerate(complete_data_list) if i > TRAINING_SIZE]


In [None]:
def convert_to_six_channel_input(left_image_array, right_image_array, annotation):    
    
    left_image = Image.fromarray(left_image_array)
    resized_left_image = left_image.resize((224, 224))
    normalized_left_image_array = np.array(resized_left_image) / 255.0
    
    right_image = Image.fromarray(right_image_array)
    resized_right_image = right_image.resize((224, 224))
    normalized_right_image_array = np.array(resized_right_image) / 255.0
    
    six_channel_input = np.dstack([normalized_left_image_array, normalized_right_image_array])
    
    return six_channel_input
        


In [None]:
idx = 0
a = convert_to_four_channel_input(np.array(Image.open(image_paths[idx]))[:,:,:3],
                                 np.load(segmentation_paths[idx]),
                                 np.load(depth_map_paths[idx]).T, 
                                 json.load(open(annotation_paths[idx], 'rb')))


In [None]:
def generator(data_list, steps_per_epoch, BATCH_SIZE, INPUT_SHAPE):
    i = 0
    while True:
        x_batch = np.empty((BATCH_SIZE, INPUT_SHAPE[0], INPUT_SHAPE[1], INPUT_SHAPE[2]))
        y_batch = np.empty((BATCH_SIZE, 1))
        for (ind, j) in enumerate(range(i*BATCH_SIZE, min((i+1)*BATCH_SIZE, len(data_list)))):
            left_image_array = np.array(Image.open(data_list[j][0]))[:,:,:3]
            right_image_array = np.array(Image.open(data_list[j][1]))[:,:,:3]
            annotation = json.load(open(data_list[j][2], 'rb'))
            six_channel_input = convert_to_six_channel_input(left_image_array, right_image_array, annotation)
            x_batch[ind, ...] = six_channel_input
            y_batch[ind] = annotation['volume']
            
        i += 1
        if i >= steps_per_epoch:
            i = 0
        yield x_batch, y_batch

In [None]:
# Generate a model with all layers (with top)
vgg16 = VGG16(weights=None, include_top=True, input_shape=(224, 224, 6))

#Add a layer where input is the output of the  second last layer 
x = Dense(1, name='predictions')(vgg16.layers[-2].output)

#Then create the corresponding model 
model = Model(input=vgg16.input, output=x)
model.summary()

In [None]:
optimizer = keras.optimizers.adam(lr=0.0005, decay=0.1)
model.compile(loss='mean_absolute_error',
              optimizer=optimizer,
              metrics=['accuracy'])

In [None]:
BATCH_SIZE = 25
steps_per_epoch = int(len(train_data_list)/BATCH_SIZE)
gen = generator(train_data_list, steps_per_epoch, BATCH_SIZE, (224, 224, 6))

In [None]:
model.fit_generator(gen, steps_per_epoch=steps_per_epoch, epochs=100, verbose=True)

In [None]:
eval_gen = generator(test_data_list, np.inf, BATCH_SIZE, (224, 224, 6))
predictions = model.predict_generator(eval_gen, 50)

In [None]:
ground_truth_values = np.array([])
for i in range(50*25):
    annotation = json.load(open(test_data_list[i][2], 'rb'))
    ground_truth_values = np.append(ground_truth_values, annotation['volume'])

In [None]:
(ground_truth_values.mean() - predictions[:,0].mean())/(ground_truth_values.mean())

In [None]:
ground_truth_values

In [None]:
predictions[:,0]

In [None]:
ground_truth_values.mean()

In [None]:
ground_truth_values

In [None]:
plt.scatter(ground_truth_values, predictions[:,0])

In [None]:
(abs(ground_truth_values - predictions[:,0])/ground_truth_values).mean()