In [402]:
import os
import tensorflow.keras.backend as K
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, LSTM, SimpleRNN
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.layers import Input
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
import editdistance

In [3]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [4]:
import tensorflow as tf

In [5]:
from tqdm import tqdm

In [574]:
import time

# 1. Подготовка данных

In [403]:
with open('data/ru.txt', 'r') as f:
    lines = f.readlines()
    lines = [line.strip() for line in lines if not '(' in line]
    lines = lines[9:]

In [404]:
words_pad = []
words_len = []
spells_pad = []
spells_len = []
for line in tqdm(lines):
    chunks = line.split()
    graphs = list(chunks[0])
    phones = chunks[1:]
    if len(graphs) > 8 or len(phones) > 8:
        continue
    words_pad.append(graphs)
    spells_pad.append(phones)
    spells_len.append(len(phones))

100%|██████████| 533911/533911 [00:01<00:00, 288408.43it/s]


In [257]:
a = np.array(spells_len)

In [259]:
with open('spell_seq_len.npy', 'wb') as f:
    np.save(f, a)

In [290]:
words_pad

[['а', 'а', 'з', 'а'],
 ['а', 'а', 'з', 'о', 'м'],
 ['а', 'а', 'з', 'у'],
 ['а', 'а', 'к', 'а'],
 ['а', 'а', 'л', 'и'],
 ['а', 'а', 'л', 'т', 'о', 'н', 'е', 'н'],
 ['а', 'а', 'л', 'ь', 'м', 'а', 'р'],
 ['а', 'а', 'р'],
 ['а', 'а', 'р', 'е'],
 ['а', 'а', 'р', 'е', 'н', 'и', 'с', 'е'],
 ['а', 'а', 'р', 'н', 'е'],
 ['а', 'а', 'р', 'о', 'н'],
 ['а', 'а', 'р', 'о', 'н', 'а'],
 ['а', 'а', 'р', 'о', 'н', 'о', 'м'],
 ['а', 'а', 'р', 'о', 'н', 'у'],
 ['а', 'а', 'р', 'ф', 'и'],
 ['а', 'а', 'х', 'е', 'н'],
 ['а', 'а', 'х', 'е', 'н', 'а'],
 ['а', 'а', 'х', 'е', 'н', 'е'],
 ['а', 'а', 'э'],
 ['а', 'б'],
 ['а', 'б', 'а'],
 ['а', 'б', 'а', 'г', 'е', 'й', 'л'],
 ['а', 'б', 'а', 'д', 'ж', 'и', 'и', 'м'],
 ['а', 'б', 'а', 'д', 'о', 'н', 'н', 'а'],
 ['а', 'б', 'а', 'ж'],
 ['а', 'б', 'а', 'ж', 'у', 'р'],
 ['а', 'б', 'а', 'ж', 'у', 'р', 'а'],
 ['а', 'б', 'а', 'ж', 'у', 'р', 'а', 'х'],
 ['а', 'б', 'а', 'ж', 'у', 'р', 'е'],
 ['а', 'б', 'а', 'ж', 'у', 'р', 'о', 'в'],
 ['а', 'б', 'а', 'ж', 'у', 'р', 'о', 'м'],

In [405]:
words_pad = pad_sequences(words_pad, value='#', maxlen=8, dtype=object)

In [406]:
spells_pad = pad_sequences(spells_pad, value='#', maxlen=8, dtype=object)

In [407]:
with open('data/phonemes.txt', 'r') as f:
    lines_1 = f.readlines()
    phonemes_dict = [line.strip() for line in lines_1]

In [408]:
with open('data/graphemes.txt', 'r') as f:
    lines_2 = f.readlines()
    graphemes_dict = [line.strip() for line in lines_2]

In [843]:
def one_hot_encoding_symbol(x, dictionary):
    vector = [0]*len(dictionary)
    if x == '#':
        return vector
    else:
        vector[dictionary.index(x)] = 1
    return vector

In [409]:
def one_hot_encoding_symbol_without_masking(x, dictionary):
    vector = [0]*len(dictionary)
    vector[dictionary.index(x)] = 1
    return vector

In [410]:
def get_X_Y(words_pad, spells_pad, graphemes_dict, phonemes_dict):
    words_one_hot = []
    for word in tqdm(words_pad):
        word_one_hot = []
        for grapheme in word:
            word_one_hot.append(one_hot_encoding_symbol_without_masking(grapheme, graphemes_dict))
        words_one_hot.append(word_one_hot)
    
    spells_one_hot = []
    for spell in tqdm(spells_pad):
        spell_one_hot = []
        for phoneme in spell:
            spell_one_hot.append(one_hot_encoding_symbol_without_masking(phoneme, phonemes_dict))
        spells_one_hot.append(spell_one_hot)
    return words_one_hot, spells_one_hot

In [411]:
X, Y = get_X_Y(words_pad, spells_pad, graphemes_dict, phonemes_dict)

100%|██████████| 253403/253403 [00:05<00:00, 44484.95it/s]
100%|██████████| 253403/253403 [00:06<00:00, 40821.09it/s]


In [412]:
X = [np.array(matrix) for matrix in X]
X = [np.expand_dims(matrix, axis=0) for matrix in X]
X = np.row_stack((X))

In [413]:
Y = [np.array(matrix) for matrix in Y]
Y = [np.expand_dims(matrix, axis=0) for matrix in Y]
Y = np.row_stack((Y))

In [300]:
Y.shape

(253403, 8, 49)

In [301]:
X.shape

(253403, 8, 35)

In [263]:
with open('labels.npy', 'wb') as f:
    np.save(f, Y)

In [383]:
def train_test_split_self(X, Y, Z, train_size=0.7, val_size=0.3):
    np.random.shuffle(X)
    X_train = X[:int(len(X)*train_size)]
    X_test = X[int(len(X)*train_size):]
    np.random.shuffle(X_train)
    X_val = X_train[:int(len(X_train)*val_size)]
    X_train = X_train[int(len(X_train)*val_size):]
    
    np.random.shuffle(Y)
    Y_train = Y[:int(len(Y)*train_size)]
    Y_test = Y[int(len(Y)*train_size):]
    np.random.shuffle(Y_train)
    Y_val = Y_train[:int(len(Y_train)*val_size)]
    Y_train = Y_train[int(len(Y_train)*val_size):]
    
    np.random.shuffle(spells_len)
    Z_train = Z[:int(len(Z)*train_size)]
    Z_test = Z[int(len(Z)*train_size):]
    np.random.shuffle(X_train)
    Z_val = Z_train[:int(len(Z_train)*val_size)]
    Z_train = Z_train[int(len(Z_train)*val_size):]
    
    return X_train, X_test, X_val, Y_train, Y_test, Y_val, Z_train, Z_test, Z_val

In [137]:
spells_len

[7,
 7,
 8,
 4,
 7,
 6,
 6,
 8,
 8,
 5,
 7,
 6,
 6,
 6,
 5,
 7,
 5,
 4,
 8,
 6,
 8,
 6,
 8,
 6,
 7,
 8,
 5,
 7,
 7,
 7,
 8,
 6,
 5,
 6,
 8,
 6,
 8,
 6,
 6,
 5,
 7,
 8,
 6,
 6,
 7,
 8,
 8,
 7,
 5,
 3,
 6,
 7,
 8,
 7,
 8,
 7,
 7,
 6,
 7,
 6,
 6,
 5,
 7,
 5,
 6,
 4,
 8,
 5,
 5,
 4,
 8,
 6,
 7,
 6,
 5,
 8,
 7,
 7,
 8,
 8,
 4,
 7,
 7,
 8,
 7,
 8,
 8,
 8,
 6,
 8,
 8,
 5,
 6,
 7,
 7,
 6,
 8,
 8,
 4,
 6,
 6,
 7,
 6,
 7,
 8,
 7,
 6,
 8,
 7,
 7,
 6,
 8,
 8,
 7,
 7,
 3,
 8,
 7,
 4,
 4,
 7,
 6,
 6,
 7,
 3,
 8,
 6,
 4,
 4,
 4,
 5,
 8,
 4,
 5,
 6,
 5,
 6,
 8,
 8,
 8,
 8,
 8,
 8,
 5,
 5,
 5,
 7,
 7,
 7,
 6,
 4,
 5,
 7,
 7,
 8,
 6,
 8,
 5,
 7,
 7,
 7,
 7,
 8,
 7,
 4,
 5,
 8,
 6,
 6,
 6,
 8,
 5,
 3,
 8,
 8,
 7,
 4,
 6,
 7,
 8,
 7,
 5,
 8,
 7,
 7,
 6,
 8,
 6,
 6,
 8,
 5,
 8,
 5,
 6,
 6,
 5,
 5,
 7,
 5,
 6,
 8,
 6,
 5,
 4,
 5,
 5,
 7,
 8,
 6,
 8,
 8,
 7,
 7,
 8,
 8,
 6,
 5,
 7,
 7,
 8,
 3,
 7,
 5,
 6,
 7,
 5,
 7,
 5,
 7,
 8,
 8,
 6,
 7,
 6,
 8,
 7,
 6,
 6,
 7,
 4,
 8,
 7,
 7,
 6,
 7,
 8,
 7,
 5,
 5,
 6,


In [384]:
X_train, X_test, X_val, Y_train, Y_test, Y_val, Z_train, Z_test, Z_val = train_test_split_self(X, Y, spells_len)

In [415]:
Z = spells_len

In [416]:
x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(X, Y, Z, test_size=0.1, random_state=27)

In [243]:
X_train = tf.cast(tf.constant(X_train), tf.dtypes.uint8)
X_test = tf.cast(tf.constant(X_test), tf.dtypes.uint8)
X_val = tf.cast(tf.constant(X_val), tf.dtypes.uint8)
Y_train = tf.cast(tf.constant(Y_train), tf.dtypes.uint8)
Y_test = tf.cast(tf.constant(Y_test), tf.dtypes.uint8)
Y_val = tf.cast(tf.constant(Y_val), tf.dtypes.uint8)
Z_train = tf.cast(tf.constant(Z_train), tf.dtypes.uint8)
Z_test = tf.cast(tf.constant(Z_test), tf.dtypes.uint8)
Z_val = tf.cast(tf.constant(Z_val), tf.dtypes.uint8)

<TensorDataset shapes: ((124168, 8, 35), (124168, 8, 49), (124168,)), types: (tf.int64, tf.int64, tf.int64)>

In [244]:
ds = tf.data.Dataset.from_tensor_slices((X_train, Y_train))

In [245]:
ds = ds.shuffle(5000).batch(16)

In [246]:
ds

<BatchDataset shapes: ((None, 8, 35), (None, 8, 49)), types: (tf.uint8, tf.uint8)>

### Маскирование

In [933]:
# нужно если не использовать дополнительный класс для паддинга 

In [25]:
masked_x_train = masking(x_train)

In [26]:
masked_x_train._keras_mask

<tf.Tensor: shape=(42116, 8), dtype=bool, numpy=
array([[False, False, False, ...,  True,  True,  True],
       [False, False, False, ...,  True,  True,  True],
       [False, False, False, ...,  True,  True,  True],
       ...,
       [False, False, False, ...,  True,  True,  True],
       [False, False, False, ...,  True,  True,  True],
       [False, False, False, ...,  True,  True,  True]])>

In [27]:
masked_x_train.shape

TensorShape([42116, 8, 35])

#### разбиваем данные на мини батчи

In [None]:
test_indices = np.random.choice

In [638]:
y_train.shape

(42116, 8, 49)

In [704]:
model = Sequential()

In [706]:
tf.keras.Model

tensorflow.python.keras.engine.training.Model

In [705]:
model

<tensorflow.python.keras.engine.sequential.Sequential at 0x7f66a94e9470>

In [220]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [223]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))

In [225]:
train_dataset = train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)

In [227]:
train_dataset

<BatchDataset shapes: ((None, 28, 28), (None,)), types: (tf.uint8, tf.uint8)>

In [222]:
x_train.shape

(60000, 28, 28)

In [215]:
ds = ds.batch(4)

In [212]:
ds = ds.batch(3)

In [195]:
ds

<BatchDataset shapes: (None,), types: tf.int64>

In [216]:
list(ds.as_numpy_iterator())

[array([0, 1, 2, 3]), array([4, 5, 6, 7])]

In [187]:
ds = tf.data.Dataset.from_tensors(tf.constant([1, 2, 3, 4, 5, 6]))

In [191]:
ds.batch(2)

<BatchDataset shapes: (None, 6), types: tf.int32>

## Create custom model

In [519]:
class CustomModel(tf.keras.Model):
    def train_step(self, data):
        x, y = data
        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)  # Forward pass
            # Compute the loss value
            # (the loss function is configured in `compile()`)
            loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        # Update metrics (includes the metric that tracks the loss)
        self.compiled_metrics.update_state(y, y_pred)
        # Return a dict mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

# 2. Создание модели

In [438]:
inputs = Input((8, 35))
rnn = tf.keras.layers.SimpleRNN(128, activation='tanh', return_sequences=True, input_shape=(8, 35))
x = rnn(inputs)
dense = Dense(49, activation='softmax')
outputs = dense(x)
model= CustomModel(inputs=inputs, outputs=outputs)

In [155]:
rnn = tf.keras.layers.SimpleRNN(128, activation='tanh', return_sequences=True, input_shape=(8, 35))

In [156]:
x = rnn(inputs)

In [157]:
dense = Dense(49, activation='softmax')

In [158]:
outputs = dense(x)

In [166]:
model_f = CustomModel(inputs=inputs, outputs=outputs)

In [167]:
model_f.summary()

Model: "custom_model_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 8, 35)]           0         
_________________________________________________________________
simple_rnn_7 (SimpleRNN)     (None, 8, 128)            20992     
_________________________________________________________________
dense_7 (Dense)              (None, 8, 49)             6321      
Total params: 27,313
Trainable params: 27,313
Non-trainable params: 0
_________________________________________________________________


In [731]:
model = Sequential()
model.add(LSTM(512, input_shape=(8, 35)))

In [313]:
model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_13 (SimpleRNN)    (None, 8, 128)            20992     
_________________________________________________________________
dense_13 (Dense)             (None, 8, 49)             6321      
Total params: 27,313
Trainable params: 27,313
Non-trainable params: 0
_________________________________________________________________


In [748]:
def dense_to_sparse(dense, pad_value=-1):
    pad_value = tf.constant(pad_value, dtype=tf.int32)
    where = tf.not_equal(dense, pad_value)
    indices = tf.where(where)
    values = tf.gather_nd(dense, indices)
    dense_shape = tf.cast(tf.shape(dense), dtype=tf.int64)
    return tf.SparseTensor(indices, values, dense_shape)

In [797]:
def my_metric_fn(y_true, y_pred):
    input_shape = K.shape(y_pred)
    input_length = tf.ones(shape=input_shape[0]) * K.cast(input_shape[1], 'float32')
    labels_dec, _ = K.ctc_decode(y_pred, input_length, greedy=True)
    hypothesis = dense_to_sparse(tf.cast(labels_dec[0], tf.int32))
    y_true = tf.argmax(y_true, axis=2)
    truth = dense_to_sparse(tf.cast(y_true, dtype=tf.int32))
    curler = tf.edit_distance(hypothesis=hypothesis, truth=truth)
    ler = tf.reduce_mean(curler)
    return ler

In [328]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics='accuracy')

In [174]:
early_stopping=EarlyStopping(monitor='val_loss', patience=3)

In [234]:
model_f.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [235]:
histoty = model_f.fit(ds, epochs=10, verbose=1, callbacks=[early_stopping])

Epoch 1/10
Epoch 2/10
Epoch 3/10

KeyboardInterrupt: 

In [264]:
histoty = model_f.fit(ds, batch_size=16, epochs=10, verbose=1, callbacks=[early_stopping],
                     validation_data=(X_val, Y_val))

Epoch 1/10

KeyboardInterrupt: 

In [281]:
x_train.shape

(177382, 8, 35)

In [283]:
y_train[0]

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [251]:
import pickle

In [252]:
with open('./dataset.pickle', 'wb') as f:
    pickle.dump(ds, f)

InternalError: Tensorflow type 21 not convertible to numpy dtype.

In [585]:
x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(X, Y, Z, test_size=0.1, random_state=27)

In [586]:
batch_size = 128

In [587]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train, z_train))

In [588]:
train_dataset = train_dataset.batch(batch_size)

In [589]:
train_dataset

<BatchDataset shapes: ((None, 8, 35), (None, 8, 49), (None,)), types: (tf.int64, tf.int64, tf.int32)>

In [590]:
class CustomModel(tf.keras.Model):
    def train_step(self, data):
        x, y, z = data
        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)  # Forward pass
            # Compute the loss value
            # (the loss function is configured in `compile()`)
            loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        # Update metrics (includes the metric that tracks the loss)
        self.compiled_metrics.update_state(y, y_pred)
        # Return a dict mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

In [618]:
class Cer(tf.keras.metrics.Metric):

    def __init__(self, name='ler', **kwargs):
        super(Cer, self).__init__(name=name, **kwargs)
        self.cer_accumulator = self.add_weight(name="total_cer", initializer="zeros")
        self.counter = self.add_weight(name="cer_count", initializer="zeros")

    def update_state(self, y_true, y_pred, input_length, sample_weight=None):
        def dense_to_sparse(dense, pad_value=-1):
            pad_value = tf.constant(pad_value, dtype=tf.int32)
            where = tf.not_equal(dense, pad_value)
            indices = tf.where(where)
            values = tf.gather_nd(dense, indices)
            dense_shape = tf.cast(tf.shape(dense), dtype=tf.int64)
            return tf.SparseTensor(indices, values, dense_shape)

#         input_shape = K.shape(y_pred)
#         input_length = tf.ones(shape=input_shape[0]) * K.cast(input_shape[1], 'float32')
        labels_dec, _ = K.ctc_decode(y_pred, input_length, greedy=True)
        hypothesis = dense_to_sparse(tf.cast(labels_dec[0], tf.int32))
        y_true = tf.argmax(y_true, axis=2)
        truth = dense_to_sparse(tf.cast(y_true, dtype=tf.int32))
        curcer = tf.edit_distance(hypothesis=hypothesis, truth=truth)
        cer = tf.reduce_mean(curcer)
        self.cer_accumulator.assign_add(cer)
        self.counter.assign_add(1)

    def result(self):
        return tf.math.divide_no_nan(self.cer_accumulator, self.counter)

    def reset_states(self):
        self.cer_accumulator.assign(0.0)
        self.counter.assign(0.0)

In [630]:
@tf.function
def train_step(x, y, input_length):
    with tf.GradientTape() as tape:
        logits = model(x, training=True)
        loss_value = loss_fn(y, logits)
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    train_acc_metric.update_state(y, logits)

    y_pred = tf.keras.activations.softmax(logits)
    train_cer_metric.update_state(y, y_pred, input_length)
    # # input_shape = K.shape(y_pred)
    # # input_length = tf.ones(shape=input_shape[0]) * K.cast(input_shape[1], 'float32')
#     labels_dec, _ = K.ctc_decode(y_pred, input_length, greedy=True)
    # labels_dec, _ = tf.nn.ctc_greedy_decoder(y_pred, tf.cast(input_length, tf.int32))
#     hypothesis = dense_to_sparse(tf.cast(labels_dec[0], tf.int32))
#     y_true = tf.argmax(y, axis=2)
#     truth = dense_to_sparse(tf.cast(y_true, dtype=tf.int32))
#     curler = tf.edit_distance(hypothesis=hypothesis, truth=truth)
#     ler = tf.reduce_mean(curler)
    return loss_value

In [631]:
def dense_to_sparse(dense, pad_value=-1):
    pad_value = tf.constant(pad_value, dtype=tf.int32)
    where = tf.not_equal(dense, pad_value)
    indices = tf.where(where)
    values = tf.gather_nd(dense, indices)
    dense_shape = tf.cast(tf.shape(dense), dtype=tf.int64)
    return tf.SparseTensor(indices, values, dense_shape)

In [632]:
inputs = Input((8, 35))
rnn = tf.keras.layers.LSTM(128, activation='tanh', return_sequences=True, input_shape=(8, 35))
x = rnn(inputs)
dense = Dense(49, activation='softmax',
             kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01, seed=42),
             bias_initializer=tf.keras.initializers.Zeros())
outputs = dense(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [633]:
optimizer = Adam(learning_rate=0.002)
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
train_cer_metric = Cer()

In [634]:
epochs = 10

In [635]:
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch + 1,))
    start_time = time.time()
    count = 0
    sum_ler = 0
    for step, (x_batch_train, y_batch_train, z_batch_train) in enumerate(train_dataset):
        loss_value = train_step(x_batch_train, y_batch_train, z_batch_train)
        count += 1
        # sum_ler += ler
        if step % 1000 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %s samples" % ((step + 1) * batch_size))
            # print("ler over step: %.4f" % (float(ler),))
    train_acc = train_acc_metric.result()
    print("Training acc over epoch: %.4f" % (float(train_acc),))
    train_acc_metric.reset_states()
    # train_ler = sum_ler / count
    train_cer = train_cer_metric.result()
    print("Training cer over epoch: %.4f" % (float(train_cer),))
    train_cer_metric.reset_states()
    # for x_batch_val, y_batch_val in val_dataset:
    #     test_step(x_batch_val, y_batch_val)
    # val_acc = val_acc_metric.result()
    # val_acc_metric.reset_states()
    # print("Validation acc: %.4f" % (float(val_acc),))
    print("Time taken: %.2fs" % (time.time() - start_time))


Start of epoch 1
Training loss (for one batch) at step 0: 3.8918
Seen so far: 128 samples
Training loss (for one batch) at step 1000: 3.6518
Seen so far: 128128 samples
Training acc over epoch: 0.3142
Training cer over epoch: 0.9031
Time taken: 22.18s

Start of epoch 2
Training loss (for one batch) at step 0: 3.3863
Seen so far: 128 samples
Training loss (for one batch) at step 1000: 3.2477
Seen so far: 128128 samples
Training acc over epoch: 0.6377
Training cer over epoch: 0.6386
Time taken: 20.69s

Start of epoch 3
Training loss (for one batch) at step 0: 3.2427
Seen so far: 128 samples
Training loss (for one batch) at step 1000: 3.1947
Seen so far: 128128 samples
Training acc over epoch: 0.7005
Training cer over epoch: 0.5768
Time taken: 20.65s

Start of epoch 4
Training loss (for one batch) at step 0: 3.2359
Seen so far: 128 samples
Training loss (for one batch) at step 1000: 3.1907
Seen so far: 128128 samples
Training acc over epoch: 0.7058
Training cer over epoch: 0.5752
Time ta

In [637]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [638]:
model.evaluate(x_test, y_test)



[4.5132341384887695, 0.7140158414840698]

In [639]:
from scipy.stats import zscore

In [655]:
a = [1, 2, 3]

In [651]:
zscore(a)

array([ 1.22474487,  0.        , -1.22474487])

In [656]:
a = np.array(a)

In [657]:
a.mean()

2.0

In [658]:
a.std()

0.816496580927726

In [659]:
(1 - a.mean())/ a.std()

-1.224744871391589

In [660]:
def gen():
    ragged_tensor = tf.ragged.constant([[1, 2], [3]])
    yield 42, ragged_tensor

In [685]:
def gen_1(s):
    k = 1
    for i in range(s):
        yield 42, k**2
        k += 1

In [686]:
x = gen_1(4)

In [687]:
for i  in x:
    print(i)

(42, 1)
(42, 4)
(42, 9)
(42, 16)


In [681]:
next(x)

StopIteration: 

In [700]:
def gen():
    ragged_tensor = tf.constant([[1, 2], [3, 4]])
    yield 42, ragged_tensor

dataset = tf.data.Dataset.from_generator(gen, (tf.int32, tf.int32))

In [702]:
for i in dataset:
    print(i)

(<tf.Tensor: shape=(), dtype=int32, numpy=42>, <tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[1, 2],
       [3, 4]], dtype=int32)>)
