# Visualization of a Simple Stochastic Gradient Descent running in Two Nodes

This is the same example that we saw before, now we are changing the code to run it with Horovod.

<mark>Exercise</mark>: Go over the notebook and replace the comments with the corresponding lines of Horovod that we learned with the slides:
```python
import horovod.tensorflow.keras as hvd

hvd.init()

distributed_opt = hvd.DistributedOptimizer(<keras_opt>)

initial_sync = hvd.callbacks.BroadcastGlobalVariablesCallback(0)
```
The comments will indicate what to do.

<mark>Exercise</mark>: Change the batch size and check the different SGD paths.

In [None]:
import ipcmagic

In [None]:
%ipcluster start -n 2

In [None]:
%%px
import socket
socket.gethostname()

In [None]:
%%px
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf
# import horovod

In [None]:
%%px
# initialize horovod

In [None]:
%%px --target 0
tf.version.VERSION

In [None]:
%%px
# Create a linear function with noise as our data
nsamples = 1000
ref_slope = 2.0
ref_offset = 0.0
noise = np.random.random((nsamples, 1)) - 0.5    # -0.5 to center the noise
x_train = np.random.random((nsamples, 1)) - 0.5  # -0.5 to center x around 0
y_train = ref_slope * x_train + ref_offset + noise

In [None]:
%%px
dataset = tf.data.Dataset.from_tensor_slices((x_train.astype(np.float32),
                                              y_train.astype(np.float32)))
dataset = dataset.shuffle(1000)
dataset = dataset.batch(100)
dataset = dataset.repeat(150)

In [None]:
%%px
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(1, input_shape=(1,), activation='linear'),
])

opt = tf.keras.optimizers.SGD(learning_rate=0.5)
# wrap the optimizer with horovod's one

model.compile(optimizer=opt,
              loss='mse')

In [None]:
%%px
class TrainHistory(tf.keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.vars = []
        self.loss = []

    def on_batch_end(self, batch, logs={}):
        _slope, _offset = [v.numpy() for v in self.model.variables]
        self.vars.append([_slope[0, 0], _offset[0]])
        self.loss.append(logs.get('loss'))

In [None]:
%%px
# create the initial sync callback `initial_sync`
# initial_sync = ...

In [None]:
%%px
history = TrainHistory()
fit = model.fit(dataset, callbacks=[history])  # add the initial sync callback `initial_sync`

In [None]:
%%px
slope_hist = np.array(history.vars)[:, 0]
offset_hist = np.array(history.vars)[:, 1]
loss_hist = np.array(history.loss)

matplotlib.rcParams['figure.figsize'] = (8, 3)

plt.subplot(1, 2, 1)
plt.plot(loss_hist[10:], 'r.-')
plt.xlabel('Training steps')
plt.ylabel('Loss')
plt.grid()

plt.subplot(1, 2, 2)
plt.plot(x_train, y_train, '.', alpha=.3)
plt.plot(x_train, slope_hist[0]  * x_train + offset_hist[0],  'r-', label='model (initial step)')
plt.plot(x_train, slope_hist[-1] * x_train + offset_hist[-1], 'g-', label='model (trained)')
plt.xlabel('x')
plt.ylabel('y')
plt.grid()
plt.legend()

plt.tight_layout()
plt.show()

matplotlib.rcParams['figure.figsize'] = (6, 4)

Let's see the SGD path. Both paths should be identical. If not, that means that something is missing!

In [None]:
%%px
def loss_function_field(m, n, xref, yref):
    '''Utility function for ploting the loss'''
    return np.mean(np.square(yref - m * xref - n ))

_m = np.arange(-0, 4.01, 0.1)
_n = np.arange(-0.5, 0.51, 0.1)
M, N = np.meshgrid(_m, _n)

Z = np.zeros(M.shape)
for i in range(M.shape[0]):
    for j in range(M.shape[1]):
        Z[i, j] = loss_function_field(M[i, j], N[i, j],
                                      x_train, y_train)

matplotlib.rcParams['figure.figsize'] = (10, 7)

cp = plt.contour(M, N, Z, 15, vmin=Z.min(), vmax=Z.max(), alpha=0.99, colors='k', linestyles='--')
plt.contourf(M, N, Z, vmin=Z.min(), vmax=Z.max(), alpha=0.8, cmap=plt.cm.RdYlBu_r)
plt.clabel(cp, cp.levels[:6])
plt.colorbar()
m = slope_hist[-1]
n = offset_hist[-1]
plt.plot(slope_hist, offset_hist, '.-', lw=2, c='k')
plt.plot([ref_slope], [ref_offset], 'rx', ms=10)
plt.xlim([_m.min(), _m.max()])
plt.ylim([_n.min(), _n.max()])
plt.xlabel('Slope')
plt.ylabel('Offset')
plt.show()

matplotlib.rcParams['figure.figsize'] = (6, 4)

In [None]:
%ipcluster stop