In [1]:
import re
import glob
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict

In [2]:
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.utils import shuffle

## Train same model as previous notebook

In [3]:
# Load simulated data: load complete training and validation sets.
np.random.seed(11)

# Training set should be one concatenated shuffled list of simulated examples
# Validation set similarly, but also keep information about simulation parameters
# to evaluate performance in specific simulation conditions.
bounds_files = glob.glob("../data/simulData/*_bmBounds.tab")
intensities_files = [re.sub(r"_bmBounds.tab$", "_intensities.tab", bounds_file)
                     for bounds_file in bounds_files]
y, x = shuffle(
    np.concatenate([np.loadtxt(bounds_file) for bounds_file in bounds_files]),
    np.concatenate([np.loadtxt(intensities_file) for intensities_file in intensities_files])
)

In [4]:
def overlap_metric(y_true, y_predicted):
    y_pred_round = tf.round(y_predicted)
    max_start = tf.math.maximum(y_true[:, 0], y_pred_round[:, 0])
    min_end = tf.math.minimum(y_true[:, 1], y_pred_round[:,1])
    overlap = tf.math.maximum(0.0, min_end - max_start)
    longest_bounds = tf.math.maximum(
        y_true[:, 1] + 1 - y_true[:, 0], 
        y_pred_round[:, 1] + 1 - y_pred_round[:, 0]
        )
    return overlap / longest_bounds

In [5]:
model = tf.keras.Sequential([
        layers.Dense(40, activation='relu'),
        layers.Dense(2, activation='linear')
    ])
model.compile(
    optimizer=tf.train.AdamOptimizer(learning_rate=0.01),
    loss='mse',
    metrics=['mae', overlap_metric]
    )

In [6]:
history = model.fit(x, y, epochs=1000, batch_size=8192, validation_split=0.33)

Train on 60299 samples, validate on 29701 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000


Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000

KeyboardInterrupt: 

Check same metrics as previous notebook.

In [None]:
print("Mean absolute error", history.history['val_mean_absolute_error'][-1])
print("Overlap metric", history.history['val_overlap_metric'][-1])

## Validation on different datasets

Simulated datasets, separately

In [None]:
bounds_files = glob.glob("../data/simulData/*_bmBounds.tab")
intensities_files = [re.sub(r"_bmBounds.tab$", "_intensities.tab", bounds_file)
                     for bounds_file in bounds_files]


In [None]:
adip_metrics = defaultdict(list)
scale_metrics = defaultdict(list)
for bounds_file, intensities_file in zip(bounds_files, intensities_files):
    test_y, test_x = shuffle(np.loadtxt(bounds_file), np.loadtxt(intensities_file))
    values = model.evaluate(test_x, test_y)
    m = re.search(r"/scale(.*)-adip(.*)_bmBounds.tab$", bounds_file)
    scale, adip = [float(x) for x in m.groups()]
    overlap_index = model.metrics_names.index("overlap_metric")
    overlap = values[overlap_index]
    adip_metrics[adip].append(overlap)
    scale_metrics[scale].append(overlap)

In [None]:
adiposities = sorted(adip_metrics)
adip_means = [np.mean(adip_metrics[adip]) for adip in adiposities]

In [None]:
adip_means

In [None]:
plt.plot(adiposities, adip_means)
plt.show()

In [None]:
pts

data

In [None]:
?plt.plot

## Real data predictions

In [None]:
real_data_file = "../data/realData/FS_1334172_layers.csv"
raw = np.loadtxt(real_data_file, delimiter=";")
real_data = np.transpose(raw)
preds = model.predict(real_data)

In [None]:
np.savetxt("predictions.txt", preds, fmt="%d")