Skip to content

Commit

Permalink
Merge pull request #8 from dvgodoy/weights
Browse files Browse the repository at this point in the history
Weights
  • Loading branch information
dvgodoy committed Jun 16, 2018
2 parents afdc435 + 62c4446 commit 70f9d5c
Show file tree
Hide file tree
Showing 34 changed files with 701 additions and 75 deletions.
36 changes: 33 additions & 3 deletions deepreplay/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import numpy as np
import h5py
import keras.backend as K
from keras.callbacks import Callback

class ReplayData(Callback):
Expand Down Expand Up @@ -30,18 +31,36 @@ class ReplayData(Callback):
Group inside the HDF5 file where the information is to be
saved. If the informed group name already exists, it will throw
an exception.
model: Keras Model, optional
If provided, it will set the model directly to the callback
instance and execute `on_train_begin` method to initialize
all variables and create the corresponding group in the HDF5
file.
This is intended to be used for analyzing the initial conditions
of the model without ever calling its `fit` function, where
the callback is usually called.
"""
def __init__(self, inputs, targets, filename, group_name):
def __init__(self, inputs, targets, filename, group_name, model=None):
super(ReplayData, self).__init__()
self.handler = h5py.File('{}'.format(filename), 'a')
self.inputs = inputs
self.targets = targets.reshape(-1, 1)
self.targets = targets.reshape(len(targets), -1)
self.filepath = os.path.split(filename)[0]
self.filename = filename
self.group = None
self.group_name = group_name
self.current_epoch = -1
self.n_epochs = 0
if model is not None:
self.set_model(model)
self.set_params({
'epochs': 0,
'samples': len(self.inputs),
'batch_size': len(self.inputs),
})
self.group_name = group_name + '_init'
self.on_train_begin()
self.group_name = group_name
return

def _append_weights(self):
Expand All @@ -52,14 +71,22 @@ def _append_weights(self):
for j, weights in enumerate(layer_weights):
self.group['layer{}'.format(i)]['weights{}'.format(j)][self.current_epoch + 1] = weights

def get_lr(self):
optimizer = self.model.optimizer
return K.function(inputs=[],
outputs=[optimizer.lr *
(1. / (1. + optimizer.decay * K.cast(optimizer.iterations,
K.dtype(optimizer.decay))))])(inputs=[])[0]

def on_train_begin(self, logs={}):
self.model.save(os.path.join(self.filepath, '{}_model.h5'.format(self.group_name)))
self.n_epochs = self.params['epochs']

self.group = self.handler.create_group(self.group_name)
self.group.attrs['samples'] = self.params['samples']
self.group.attrs['batch_size'] = self.params['batch_size']
self.group.attrs['n_batches'] = np.ceil(self.params['samples'] / self.params['batch_size']).astype(np.int)
self.group.attrs['n_batches'] = (self.params['samples'] + self.params['batch_size'] - 1) // \
self.params['batch_size']
self.group.attrs['n_epochs'] = self.n_epochs
self.group.attrs['n_layers'] = len(self.model.layers)
try:
Expand All @@ -81,6 +108,8 @@ def on_train_begin(self, logs={}):
for metric in self.model.metrics:
self.group.create_dataset(metric, shape=(self.n_epochs,), dtype='f')

self.group.create_dataset('lr', shape=(self.n_epochs,), dtype='f')

for i, layer in enumerate(self.model.layers):
layer_grp = self.group.create_group('layer{}'.format(i))
layer_weights = layer.get_weights()
Expand All @@ -97,6 +126,7 @@ def on_train_end(self, logs={}):

def on_epoch_begin(self, epoch, logs={}):
self.current_epoch = epoch
self.group['lr'][epoch] = self.get_lr()
return

def on_epoch_end(self, epoch, logs={}):
Expand Down
55 changes: 55 additions & 0 deletions deepreplay/datasets/ball.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import numpy as np

def load_data(n_dims=10, n_points=1000, classif_radius_fraction=0.5, only_sphere=False, shuffle=True, seed=13):
"""
Parameters
----------
n_dims: int, optional
Number of dimensions of the n-ball. Default is 10.
n_points: int, optional
Number of points in each parabola. Default is 1,000.
classif_radius_fraction: float, optional
Points farther away from the center than
`classification_radius_fraction * ball radius` are
considered to be positive cases. The remaining
points are the negative cases.
only_sphere: boolean
If True, generates a n-sphere, that is, a hollow n-ball.
Default is False.
shuffle: boolean, optional
If True, the points are shuffled. Default is True.
seed: int, optional
Random seed. Default is 13.
Returns
-------
X, y: tuple of ndarray
X is an array of shape (n_points, n_dims) containing the
points in the n-ball.
y is an array of shape (n_points, 1) containing the
classes of the samples.
"""
radius = np.sqrt(n_dims)
points = np.random.normal(size=(n_points, n_dims))
sphere = radius * points / np.linalg.norm(points, axis=1).reshape(-1, 1)
if only_sphere:
X = sphere
else:
X = sphere * np.random.uniform(size=(n_points, 1))**(1 / n_dims)

adjustment = 1 / np.std(X)
radius *= adjustment
X *= adjustment

y = (np.abs(np.sum(X, axis=1)) > (radius * classif_radius_fraction)).astype(np.int)

# But we must not feed the network with neatly organized inputs...
# so let's randomize them
if shuffle:
np.random.seed(seed)
shuffled = np.random.permutation(range(X.shape[0]))
X = X[shuffled]
y = y[shuffled].reshape(-1, 1)

return (X, y)
37 changes: 37 additions & 0 deletions deepreplay/datasets/hypercube.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import itertools
import numpy as np

def load_data(n_dims=10, vertices=(-1., 1.), shuffle=True, seed=13):
"""
Parameters
----------
n_dims: int, optional
Number of dimensions of the hypercube. Default is 10.
edge: tuple of floats, optional
Two vertices of an edge. Default is (-1., 1.).
shuffle: boolean, optional
If True, the points are shuffled. Default is True.
seed: int, optional
Random seed. Default is 13.
Returns
-------
X, y: tuple of ndarray
X is an array of shape (2 ** n_dims, n_dims) containing the
vertices coordinates of the hypercube.
y is an array of shape (2 ** n_dims, 1) containing the
classes of the samples.
"""
X = np.array(list(itertools.product(vertices, repeat=n_dims)))
y = (np.sum(np.clip(X, a_min=0, a_max=1), axis=1) >= (n_dims / 2.0)).astype(np.int)

# But we must not feed the network with neatly organized inputs...
# so let's randomize them
if shuffle:
np.random.seed(seed)
shuffled = np.random.permutation(range(X.shape[0]))
X = X[shuffled]
y = y[shuffled].reshape(-1, 1)

return (X, y)
47 changes: 45 additions & 2 deletions deepreplay/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,19 @@
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
import seaborn as sns
from collections import namedtuple
from matplotlib import animation
matplotlib.rcParams['animation.writer'] = 'ffmpeg'
matplotlib.rcParams['animation.writer'] = 'avconv'
sns.set_style('white')

FeatureSpaceData = namedtuple('FeatureSpaceData', ['line', 'bent_line', 'prediction', 'target'])
FeatureSpaceLines = namedtuple('FeatureSpaceLines', ['grid', 'input', 'contour'])
LossAndMetricData = namedtuple('LossAndMetricData', ['loss', 'metric', 'metric_name'])
ProbHistogramData = namedtuple('ProbHistogramData', ['prob', 'target'])
LossHistogramData = namedtuple('LossHistogramData', ['loss'])
LayerViolinsData = namedtuple('LayerViolinsData', ['names', 'values', 'layers', 'selected_layers'])

def build_2d_grid(xlim, ylim, n_lines=11, n_points=1000):
"""Returns a 2D grid of boundaries given by `xlim` and `ylim`,
Expand Down Expand Up @@ -588,4 +590,45 @@ def _update(i, lh, epoch_start=0):
lh.ax.xaxis.set_major_formatter(ticker.FormatStrFormatter('%0.1f'))
lh.ax.locator_params(tight=True, nbins=4)

return lh.line
return lh.line

class LayerViolins(Basic):
def __init__(self, ax, title):
super(LayerViolins, self).__init__(ax)
self.values = None
self.names = None
self._title = title

def load_data(self, layer_violins_data):
self.values = layer_violins_data.values
self.names = layer_violins_data.names
self.layers = ['inputs'] + layer_violins_data.layers
self.selected_layers = layer_violins_data.selected_layers
self.palette = dict(zip(self.layers, sns.palettes.husl_palette(len(self.layers), .7)))
self.n_epochs = len(self.values)
self._prepare_plot()
return self

def _prepare_plot(self):
self.line = self.ax.plot([], [])

@staticmethod
def _update(i, lv, epoch_start=0):
assert len(lv.names) == len(lv.values[i]), "Layer names and values have different lengths!"
epoch = i + epoch_start

df = pd.concat([pd.DataFrame(layer_values.ravel(),
columns=[layer_name]).melt(var_name='layers', value_name='values')
for layer_name, layer_values in zip(lv.names, lv.values[i])])
df = df[df.isin({'layers': lv.selected_layers}).values]

lv.ax.clear()
sns.violinplot(data=df, x='layers', y='values', ax=lv.ax, cut=0, palette=lv.palette, scale='width')
lv.ax.set_xticklabels(df.layers.unique())
lv.ax.set_xlabel('Layers')
lv.ax.set_ylabel(lv._title)
lv.ax.set_ylim([df['values'].min(), df['values'].max()])
lv.ax.set_title('{} - Epoch: {}'.format(lv.title[0], epoch))

return lv.line

0 comments on commit 70f9d5c

Please sign in to comment.