## Creating a Net with a Sample (Couple Midi Files)

In [1]:
import json
import tensorflow as tf
from keras import backend as K
from tensorflow.keras.losses import sparse_categorical_crossentropy # used for integer targets
from tensorflow.keras.optimizers import Nadam
from random import shuffle, seed

Using TensorFlow backend.


In [2]:
from tqdm import tqdm, tqdm_notebook
import numpy as np

In [63]:
train = []
target = []
with open('data/sample_input.json', 'r') as handle:
    for line in handle:
        song = json.loads(line)
        train.append(song['train'])
        target.append(song['target'])

In [75]:
sample_train = train
sample_target = target

In [5]:
print(sample_input[0].keys())

dict_keys(['time_notes', 'composer', 'title', 'split', 'year', 'duration', 'train', 'target'])


In [6]:
sample_input[0]['composer']

'Johann Sebastian Bach'

In [7]:
unique_notes = len(set(sample_input[0]['target']))

In [8]:
unique_notes

2167

In [113]:
test = [y for x in sample_train for y in x]

In [115]:
notes = [y for x in test for y in x]

In [127]:
maximum = max(set(notes))

In [128]:
maximum

43629

In [117]:
unique_notes = len(set(notes))

In [118]:
unique_notes

43628

In [1]:
len(sample_input)

NameError: name 'sample_input' is not defined

## Building the Model

In [10]:
class SeqSelfAttention(tf.keras.layers.Layer):

    ATTENTION_TYPE_ADD = 'additive'
    ATTENTION_TYPE_MUL = 'multiplicative'

    def __init__(self,
                 units=32,
                 attention_width=None,
                 attention_type=ATTENTION_TYPE_ADD,
                 return_attention=False,
                 history_only=False,
                 kernel_initializer='glorot_normal',
                 bias_initializer='zeros',
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 use_additive_bias=True,
                 use_attention_bias=True,
                 attention_activation=None,
                 attention_regularizer_weight=0.0,
                 **kwargs):
        """Layer initialization.
        For additive attention, see: https://arxiv.org/pdf/1806.01264.pdf
        :param units: The dimension of the vectors that used to calculate the attention weights.
        :param attention_width: The width of local attention.
        :param attention_type: 'additive' or 'multiplicative'.
        :param return_attention: Whether to return the attention weights for visualization.
        :param history_only: Only use historical pieces of data.
        :param kernel_initializer: The initializer for weight matrices.
        :param bias_initializer: The initializer for biases.
        :param kernel_regularizer: The regularization for weight matrices.
        :param bias_regularizer: The regularization for biases.
        :param kernel_constraint: The constraint for weight matrices.
        :param bias_constraint: The constraint for biases.
        :param use_additive_bias: Whether to use bias while calculating the relevance of inputs features
                                  in additive mode.
        :param use_attention_bias: Whether to use bias while calculating the weights of attention.
        :param attention_activation: The activation used for calculating the weights of attention.
        :param attention_regularizer_weight: The weights of attention regularizer.
        :param kwargs: Parameters for parent class.
        """
        self.supports_masking = True
        self.units = units
        self.attention_width = attention_width
        self.attention_type = attention_type
        self.return_attention = return_attention
        self.history_only = history_only
        if history_only and attention_width is None:
            self.attention_width = int(1e9)

        self.use_additive_bias = use_additive_bias
        self.use_attention_bias = use_attention_bias
        self.kernel_initializer = tf.keras.initializers.get(kernel_initializer)
        self.bias_initializer = tf.keras.initializers.get(bias_initializer)
        self.kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer)
        self.bias_regularizer = tf.keras.regularizers.get(bias_regularizer)
        self.kernel_constraint = tf.keras.constraints.get(kernel_constraint)
        self.bias_constraint = tf.keras.constraints.get(bias_constraint)
        self.attention_activation = tf.keras.activations.get(attention_activation)
        self.attention_regularizer_weight = attention_regularizer_weight
        self._backend = tf.keras.backend.backend()

        if attention_type == SeqSelfAttention.ATTENTION_TYPE_ADD:
            self.Wx, self.Wt, self.bh = None, None, None
            self.Wa, self.ba = None, None
        elif attention_type == SeqSelfAttention.ATTENTION_TYPE_MUL:
            self.Wa, self.ba = None, None
        else:
            raise NotImplementedError('No implementation for attention type : ' + attention_type)

        super(SeqSelfAttention, self).__init__(**kwargs)

    def get_config(self):
        config = {
            'units': self.units,
            'attention_width': self.attention_width,
            'attention_type': self.attention_type,
            'return_attention': self.return_attention,
            'history_only': self.history_only,
            'use_additive_bias': self.use_additive_bias,
            'use_attention_bias': self.use_attention_bias,
            'kernel_initializer': tf.keras.regularizers.serialize(self.kernel_initializer),
            'bias_initializer': tf.keras.regularizers.serialize(self.bias_initializer),
            'kernel_regularizer': tf.keras.regularizers.serialize(self.kernel_regularizer),
            'bias_regularizer': tf.keras.regularizers.serialize(self.bias_regularizer),
            'kernel_constraint': tf.keras.constraints.serialize(self.kernel_constraint),
            'bias_constraint': tf.keras.constraints.serialize(self.bias_constraint),
            'attention_activation': tf.keras.activations.serialize(self.attention_activation),
            'attention_regularizer_weight': self.attention_regularizer_weight,
        }
        base_config = super(SeqSelfAttention, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def build(self, input_shape):
        if isinstance(input_shape, list):
            input_shape = input_shape[0]
        if self.attention_type == SeqSelfAttention.ATTENTION_TYPE_ADD:
            self._build_additive_attention(input_shape)
        elif self.attention_type == SeqSelfAttention.ATTENTION_TYPE_MUL:
            self._build_multiplicative_attention(input_shape)
        super(SeqSelfAttention, self).build(input_shape)

    def _build_additive_attention(self, input_shape):
        feature_dim = input_shape[2]

        self.Wt = self.add_weight(shape=(feature_dim, self.units),
                                  name='{}_Add_Wt'.format(self.name),
                                  initializer=self.kernel_initializer,
                                  regularizer=self.kernel_regularizer,
                                  constraint=self.kernel_constraint)
        self.Wx = self.add_weight(shape=(feature_dim, self.units),
                                  name='{}_Add_Wx'.format(self.name),
                                  initializer=self.kernel_initializer,
                                  regularizer=self.kernel_regularizer,
                                  constraint=self.kernel_constraint)
        if self.use_additive_bias:
            self.bh = self.add_weight(shape=(self.units,),
                                      name='{}_Add_bh'.format(self.name),
                                      initializer=self.bias_initializer,
                                      regularizer=self.bias_regularizer,
                                      constraint=self.bias_constraint)

        self.Wa = self.add_weight(shape=(self.units, 1),
                                  name='{}_Add_Wa'.format(self.name),
                                  initializer=self.kernel_initializer,
                                  regularizer=self.kernel_regularizer,
                                  constraint=self.kernel_constraint)
        if self.use_attention_bias:
            self.ba = self.add_weight(shape=(1,),
                                      name='{}_Add_ba'.format(self.name),
                                      initializer=self.bias_initializer,
                                      regularizer=self.bias_regularizer,
                                      constraint=self.bias_constraint)

    def _build_multiplicative_attention(self, input_shape):
        feature_dim = input_shape[2]

        self.Wa = self.add_weight(shape=(feature_dim, feature_dim),
                                  name='{}_Mul_Wa'.format(self.name),
                                  initializer=self.kernel_initializer,
                                  regularizer=self.kernel_regularizer,
                                  constraint=self.kernel_constraint)
        if self.use_attention_bias:
            self.ba = self.add_weight(shape=(1,),
                                      name='{}_Mul_ba'.format(self.name),
                                      initializer=self.bias_initializer,
                                      regularizer=self.bias_regularizer,
                                      constraint=self.bias_constraint)

    def call(self, inputs, mask=None, **kwargs):
        if isinstance(inputs, list):
            inputs, positions = inputs
            positions = K.cast(positions, 'int32')
            mask = mask[1]
        else:
            positions = None

        input_len = K.shape(inputs)[1]

        if self.attention_type == SeqSelfAttention.ATTENTION_TYPE_ADD:
            e = self._call_additive_emission(inputs)
        elif self.attention_type == SeqSelfAttention.ATTENTION_TYPE_MUL:
            e = self._call_multiplicative_emission(inputs)

        if self.attention_activation is not None:
            e = self.attention_activation(e)
        e = K.exp(e - K.max(e, axis=-1, keepdims=True))
        if self.attention_width is not None:
            ones = tf.ones((input_len, input_len))
            if self.history_only:
                local = tf.linalg.band_part(
                    ones,
                    K.minimum(input_len, self.attention_width - 1),
                    0,
                )
            else:
                local = tf.linalg.band_part(
                    ones,
                    K.minimum(input_len, self.attention_width // 2),
                    K.minimum(input_len, (self.attention_width - 1) // 2),
                )
            e = e * K.expand_dims(local, 0)
        if mask is not None:
            mask = K.cast(mask, K.floatx())
            mask = K.expand_dims(mask)
            e = K.permute_dimensions(K.permute_dimensions(e * mask, (0, 2, 1)) * mask, (0, 2, 1))

        # a_{t} = \text{softmax}(e_t)
        s = K.sum(e, axis=-1)
        s = K.tile(K.expand_dims(s, axis=-1), K.stack([1, 1, input_len]))
        a = e / (s + K.epsilon())

        # l_t = \sum_{t'} a_{t, t'} x_{t'}
        v = K.batch_dot(a, inputs)
        if self.attention_regularizer_weight > 0.0:
            self.add_loss(self._attention_regularizer(a))

        if positions is not None:
            pos_num = K.shape(positions)[1]
            batch_indices = K.tile(K.expand_dims(K.arange(K.shape(inputs)[0]), axis=-1), K.stack([1, pos_num]))
            pos_indices = K.stack([batch_indices, positions], axis=-1)
            v = tf.gather_nd(v, pos_indices)
            a = tf.gather_nd(a, pos_indices)

        if self.return_attention:
            return [v, a]
        return v

    def _call_additive_emission(self, inputs):
        input_shape = K.shape(inputs)
        batch_size, input_len = input_shape[0], input_shape[1]

        # h_{t, t'} = \tanh(x_t^T W_t + x_{t'}^T W_x + b_h)
        q, k = K.dot(inputs, self.Wt), K.dot(inputs, self.Wx)
        q = K.tile(K.expand_dims(q, 2), K.stack([1, 1, input_len, 1]))
        k = K.tile(K.expand_dims(k, 1), K.stack([1, input_len, 1, 1]))
        if self.use_additive_bias:
            h = K.tanh(q + k + self.bh)
        else:
            h = K.tanh(q + k)

        # e_{t, t'} = W_a h_{t, t'} + b_a
        if self.use_attention_bias:
            e = K.reshape(K.dot(h, self.Wa) + self.ba, (batch_size, input_len, input_len))
        else:
            e = K.reshape(K.dot(h, self.Wa), (batch_size, input_len, input_len))
        return e

    def _call_multiplicative_emission(self, inputs):
        # e_{t, t'} = x_t^T W_a x_{t'} + b_a
        e = K.batch_dot(K.dot(inputs, self.Wa), K.permute_dimensions(inputs, (0, 2, 1)))
        if self.use_attention_bias:
            e = e + self.ba
        return e

    def compute_output_shape(self, input_shape):
        if isinstance(input_shape, list):
            input_shape, pos_shape = input_shape
            output_shape = (input_shape[0], pos_shape[1], input_shape[2])
        else:
            output_shape = input_shape
        if self.return_attention:
            attention_shape = (input_shape[0], output_shape[1], input_shape[1])
            return [output_shape, attention_shape]
        return output_shape

    def compute_mask(self, inputs, mask=None):
        if isinstance(inputs, list):
            mask = mask[1]
        if self.return_attention:
            return [mask, None]
        return mask

    def _attention_regularizer(self, attention):
        batch_size = K.cast(K.shape(attention)[0], K.floatx())
        input_len = K.shape(attention)[-1]
        return self.attention_regularizer_weight * K.sum(K.square(K.batch_dot(
            attention,
            K.permute_dimensions(attention, (0, 2, 1))) - tf.eye(input_len))) / batch_size

    @staticmethod
    def get_custom_objects():
        return {'SeqSelfAttention': SeqSelfAttention}

In [129]:
def create_model(seq_len, unique_notes, dropout=0.3, output_emb=100, rnn_unit=128, dense_unit=64):
    inputs = tf.keras.layers.Input(shape=(seq_len,))
    embedding = tf.keras.layers.Embedding(input_dim=unique_notes+1, output_dim=output_emb, input_length=seq_len)(inputs)
    forward_pass = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(rnn_unit, return_sequences=True))(embedding)
    forward_pass , att_vector = SeqSelfAttention(
        return_attention=True,
        attention_activation='sigmoid', 
        attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL,
        attention_width=50, 
        kernel_regularizer=tf.keras.regularizers.l2(1e-4),
        bias_regularizer=tf.keras.regularizers.l1(1e-4),
        attention_regularizer_weight=1e-4,
        )(forward_pass)
    forward_pass = tf.keras.layers.Dropout(dropout)(forward_pass)
    forward_pass = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(rnn_unit, return_sequences=True))(forward_pass)
    forward_pass , att_vector2 = SeqSelfAttention(
        return_attention=True,
        attention_activation='sigmoid', 
        attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL,
        attention_width=50, 
        kernel_regularizer=tf.keras.regularizers.l2(1e-4),
        bias_regularizer=tf.keras.regularizers.l1(1e-4),
        attention_regularizer_weight=1e-4,
        )(forward_pass)
    forward_pass = tf.keras.layers.Dropout(dropout)(forward_pass)
    forward_pass = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(rnn_unit))(forward_pass)
    forward_pass = tf.keras.layers.Dropout(dropout)(forward_pass)
    forward_pass = tf.keras.layers.Dense(dense_unit)(forward_pass)
    forward_pass = tf.keras.layers.LeakyReLU()(forward_pass)
    outputs = tf.keras.layers.Dense(unique_notes+1, activation = "softmax")(forward_pass)

    model = tf.keras.Model(inputs=inputs, outputs=outputs, name='generate_scores_rnn')
    return model

In [130]:
model = create_model(50, unique_notes+1)

In [131]:
model.summary()

Model: "generate_scores_rnn"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 50)]              0         
_________________________________________________________________
embedding_2 (Embedding)      (None, 50, 100)           4363000   
_________________________________________________________________
bidirectional_6 (Bidirection (None, 50, 256)           176640    
_________________________________________________________________
seq_self_attention_4 (SeqSel [(None, 50, 256), (None,  65537     
_________________________________________________________________
dropout_6 (Dropout)          (None, 50, 256)           0         
_________________________________________________________________
bidirectional_7 (Bidirection (None, 50, 256)           296448    
_________________________________________________________________
seq_self_attention_5 (SeqSel [(None, 50, 256), 

In [122]:
len(sample_train)

40

In [141]:
import random

In [142]:
for i in range(0,40,5):
    batch = random.sample([y for x in sample_target[i:i+5] for y in x], 10000)
    print(len(batch))

10000
10000
10000
10000
10000
10000
10000
10000


In [152]:
class TrainModel:
  
    def __init__(self, epochs, sample_train, sample_target, batch_nnet_size, batch_song, optimizer, checkpoint, loss_fn,
               checkpoint_prefix, total_songs, model):
        self.epochs = epochs
        self.sample_train = sample_train
        self.sample_target = sample_target
        self.batch_nnet_size = batch_nnet_size
        self.batch_song = batch_song
        self.optimizer = optimizer
        self.checkpoint = checkpoint
        self.loss_fn = loss_fn
        self.checkpoint_prefix = checkpoint_prefix
        self.total_songs = total_songs
        self.model = model
    
    def train(self):
        for epoch in tqdm_notebook(range(self.epochs),desc='epochs'):
            # for each epochs, we shuffle the list of all the datasets
            c = list(zip(self.sample_train, self.sample_target))
            shuffle(c)
            self.sample_train, self.sample_target = zip(*c)
            loss_total = 0
            steps = 0
            steps_nnet = 0

            # Iterate all songs by the length of sample input (total_songs) and batches (batch_song)
            for i in tqdm_notebook(range(0,self.total_songs, self.batch_song), desc='MUSIC'):
                # EXAMPLE: [0,5,10,15,20] FOR TOTAL_SONGS = 20 AND BATCH_SONG = 5
                steps += 1
                #inputs_nnet_large, outputs_nnet_large = generate_batch_song(
                 #   self.sample_input, self.batch_song, start_index=i, fs=self.frame_per_second, 
                  #  seq_len=seq_len, use_tqdm=False) # We use the function that have been defined here
                #inputs_nnet_large = np.array(self.note_tokenizer.transform(inputs_nnet_large), dtype=np.int32)
                #outputs_nnet_large = np.array(self.note_tokenizer.transform(outputs_nnet_large), dtype=np.int32)
                
                # EXAMPLE LARGE INPUTS = ARRAY([1,2,3,4],[2,3,4,5],[2,3,4,5],[2,3,4,5],[1,2,3,4])
                input_batch = [y for x in self.sample_train[i:i+self.batch_song] for y in x]
                output_batch = [y for x in self.sample_target[i:i+self.batch_song] for y in x]
                c = list(zip(input_batch, output_batch))
                sample = random.sample(c, 10000)
                input_batch, output_batch = zip(*sample)
                inputs_nnet_large = np.array(input_batch)
                outputs_nnet_large = np.array(output_batch)

                # Get an index of all windows in a song
                index_shuffled = np.arange(start=0, stop=len(inputs_nnet_large))
                np.random.shuffle(index_shuffled)
                
                for nnet_steps in tqdm_notebook(range(0,len(index_shuffled),self.batch_nnet_size)):
                    steps_nnet += 1
                    current_index = index_shuffled[nnet_steps:nnet_steps+self.batch_nnet_size]

                    inputs_nnet, outputs_nnet = inputs_nnet_large[current_index], outputs_nnet_large[current_index]

                    # To make sure no exception thrown by tensorflow on autograph
                    if len(inputs_nnet) // self.batch_nnet_size != 1:
                        break
                    loss = self.train_step(inputs_nnet, outputs_nnet)
                    loss_total += tf.math.reduce_sum(loss)
                    if steps_nnet % 20 == 0:
                        print("epochs {} | Steps {} | total loss : {}".format(epoch + 1, steps_nnet,loss_total))

                    checkpoint.save(file_prefix = self.checkpoint_prefix)

    @tf.function
    def train_step(self, inputs, targets):
        with tf.GradientTape() as tape:
            prediction = self.model(inputs)
            loss = self.loss_fn(targets, prediction)
        gradients = tape.gradient(loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
        return loss

In [100]:
import os

In [82]:
optimizer = Nadam()

checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 model=model)
checkpoint_dir = 'models/training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
loss_fn = sparse_categorical_crossentropy

In [138]:
seq_len = 50
EPOCHS = 2
BATCH_SONG = 5
BATCH_NNET_SIZE = 96
TOTAL_SONGS = len(sample_input)
FRAME_PER_SECOND = 5

In [153]:
train_class = TrainModel(EPOCHS, sample_train, sample_target, BATCH_NNET_SIZE, BATCH_SONG, optimizer, checkpoint, loss_fn, checkpoint_prefix, 
                        TOTAL_SONGS, model)
train_class.train()

HBox(children=(IntProgress(value=0, description='epochs', max=2, style=ProgressStyle(description_width='initia…

HBox(children=(IntProgress(value=0, description='MUSIC', max=8, style=ProgressStyle(description_width='initial…

10000


HBox(children=(IntProgress(value=0, max=105), HTML(value='')))

epochs 1 | Steps 20 | total loss : 10832.0732421875
epochs 1 | Steps 40 | total loss : 21231.33984375
epochs 1 | Steps 60 | total loss : 31167.056640625
epochs 1 | Steps 80 | total loss : 40718.390625
epochs 1 | Steps 100 | total loss : 50313.7109375
10000


HBox(children=(IntProgress(value=0, max=105), HTML(value='')))

epochs 1 | Steps 120 | total loss : 59818.59765625
epochs 1 | Steps 140 | total loss : 68656.671875
epochs 1 | Steps 160 | total loss : 77546.78125
epochs 1 | Steps 180 | total loss : 85468.1484375
epochs 1 | Steps 200 | total loss : 93271.1328125
10000


HBox(children=(IntProgress(value=0, max=105), HTML(value='')))

epochs 1 | Steps 220 | total loss : 102166.140625
epochs 1 | Steps 240 | total loss : 112415.7421875
epochs 1 | Steps 260 | total loss : 122380.3046875
epochs 1 | Steps 280 | total loss : 132061.421875
epochs 1 | Steps 300 | total loss : 141436.53125
10000


HBox(children=(IntProgress(value=0, max=105), HTML(value='')))

epochs 1 | Steps 320 | total loss : 150375.234375
epochs 1 | Steps 340 | total loss : 159406.4375
epochs 1 | Steps 360 | total loss : 168318.09375
epochs 1 | Steps 380 | total loss : 176797.796875
epochs 1 | Steps 400 | total loss : 184858.0
10000


HBox(children=(IntProgress(value=0, max=105), HTML(value='')))

epochs 1 | Steps 440 | total loss : 200343.046875
epochs 1 | Steps 460 | total loss : 207611.640625
epochs 1 | Steps 480 | total loss : 214769.796875
epochs 1 | Steps 500 | total loss : 221442.234375
epochs 1 | Steps 520 | total loss : 228157.21875
10000


HBox(children=(IntProgress(value=0, max=105), HTML(value='')))

epochs 1 | Steps 540 | total loss : 236321.578125
epochs 1 | Steps 560 | total loss : 244680.59375
epochs 1 | Steps 580 | total loss : 252365.28125
epochs 1 | Steps 600 | total loss : 259966.1875
epochs 1 | Steps 620 | total loss : 267199.84375
10000


HBox(children=(IntProgress(value=0, max=105), HTML(value='')))

epochs 1 | Steps 640 | total loss : 274169.1875
epochs 1 | Steps 660 | total loss : 281352.78125
epochs 1 | Steps 680 | total loss : 287873.5625
epochs 1 | Steps 700 | total loss : 294055.25
epochs 1 | Steps 720 | total loss : 300448.71875
10000


HBox(children=(IntProgress(value=0, max=105), HTML(value='')))

epochs 1 | Steps 740 | total loss : 307497.25
epochs 1 | Steps 760 | total loss : 317151.4375
epochs 1 | Steps 780 | total loss : 326595.4375
epochs 1 | Steps 800 | total loss : 335699.625
epochs 1 | Steps 820 | total loss : 344257.5



HBox(children=(IntProgress(value=0, description='MUSIC', max=8, style=ProgressStyle(description_width='initial…

10000


HBox(children=(IntProgress(value=0, max=105), HTML(value='')))

epochs 2 | Steps 20 | total loss : 8400.302734375
epochs 2 | Steps 40 | total loss : 15711.0361328125
epochs 2 | Steps 60 | total loss : 22817.87109375
epochs 2 | Steps 80 | total loss : 30245.32421875
epochs 2 | Steps 100 | total loss : 37110.68359375
10000


HBox(children=(IntProgress(value=0, max=105), HTML(value='')))

epochs 2 | Steps 120 | total loss : 44561.30078125
epochs 2 | Steps 140 | total loss : 52363.55078125
epochs 2 | Steps 160 | total loss : 60124.44140625
epochs 2 | Steps 180 | total loss : 68032.71875
epochs 2 | Steps 200 | total loss : 75561.9765625
10000


HBox(children=(IntProgress(value=0, max=105), HTML(value='')))

epochs 2 | Steps 220 | total loss : 83402.703125
epochs 2 | Steps 240 | total loss : 91349.3359375
epochs 2 | Steps 260 | total loss : 99019.671875
epochs 2 | Steps 280 | total loss : 106521.609375
epochs 2 | Steps 300 | total loss : 113792.59375
10000


HBox(children=(IntProgress(value=0, max=105), HTML(value='')))

epochs 2 | Steps 320 | total loss : 121258.8359375
epochs 2 | Steps 340 | total loss : 130504.90625
epochs 2 | Steps 360 | total loss : 139471.203125
epochs 2 | Steps 380 | total loss : 148122.828125
epochs 2 | Steps 400 | total loss : 156184.609375
10000


HBox(children=(IntProgress(value=0, max=105), HTML(value='')))

epochs 2 | Steps 440 | total loss : 170808.984375
epochs 2 | Steps 460 | total loss : 177316.484375
epochs 2 | Steps 480 | total loss : 183528.328125
epochs 2 | Steps 500 | total loss : 189503.625
epochs 2 | Steps 520 | total loss : 195483.90625
10000


HBox(children=(IntProgress(value=0, max=105), HTML(value='')))

epochs 2 | Steps 540 | total loss : 203575.15625
epochs 2 | Steps 560 | total loss : 212322.609375
epochs 2 | Steps 580 | total loss : 220441.6875
epochs 2 | Steps 600 | total loss : 228297.765625
epochs 2 | Steps 620 | total loss : 235884.40625
10000


HBox(children=(IntProgress(value=0, max=105), HTML(value='')))

epochs 2 | Steps 640 | total loss : 243720.90625
epochs 2 | Steps 660 | total loss : 252363.5625
epochs 2 | Steps 680 | total loss : 260527.703125
epochs 2 | Steps 700 | total loss : 267933.5
epochs 2 | Steps 720 | total loss : 275298.90625
10000


HBox(children=(IntProgress(value=0, max=105), HTML(value='')))

epochs 2 | Steps 740 | total loss : 281828.59375
epochs 2 | Steps 760 | total loss : 287629.0
epochs 2 | Steps 780 | total loss : 293047.5
epochs 2 | Steps 800 | total loss : 298328.875
epochs 2 | Steps 820 | total loss : 303088.1875




In [154]:
model.save('model_s40_ep2.h5')