In [9]:
import sys
sys.path.insert(0,'./../src')

from utils.Other import labelBaseMap
from models.Attention.Transformer import Transformer
from models.Attention.CustomSchedule import CustomSchedule
from models.Attention.attention_utils import create_masks

from utils.DataPrepper import DataPrepper
from utils.DataGenerator import DataGenerator
from utils.Other import set_gpu_growth

In [18]:
import tensorflow as tf
import numpy as np
import time

In [134]:
class ConvolutionBlock(tf.keras.layers.Layer):
    def __init__(self, num_cnn_layers, filters, idx):
        super(ConvolutionBlock, self).__init__()
        
        cnn_dims = [1,3,1]
        assert (len(cnn_dims) == num_cnn_layers)
        
        self.num_cnn_layers = num_cnn_layers 
        
        self.residual = tf.keras.layers.Conv1D(filters, 1, padding="same", name=f"res{idx}-r")      
        self.cnn_layers = [tf.keras.layers.Conv1D(filters, cnn_dims[i], padding="same", activation="relu", use_bias="false", name=f"res{idx}-c{i}") for i in range(num_cnn_layers)]
        self.activation_layer = tf.keras.layers.Activation('relu', name=f"res{idx}-relu")
        
    def call(self, x):
        res = self.residual(x)
        
        for i in range(self.num_cnn_layers):
            x = self.cnn_layers[i](x)
        
        x += res
        x = self.activation_layer(x)
        return x

In [147]:
class FishAndChips(tf.keras.Model):
    def __init__(self, d_model, num_cnn_blocks, max_pool_layer_idx):
        super(FishAndChips, self).__init__()
           
        self.max_pool_layer_idx = max_pool_layer_idx
        self.max_pool = tf.keras.layers.MaxPooling1D(pool_size=2, name="max_pool_1D")
        
        self.cnn_blocks = [ConvolutionBlock(3, d_model, i) for i in range(num_cnn_blocks)]
        self.tranformer = Transformer(num_layers=2, d_model=d_model, output_dim=4, num_heads=8, dff=2048, pe_input=1000, pe_target=1000)
    
    def call(self, x, y_true, training, enc_padding_mask, look_ahead_mask, dec_padding_mask):
        x = self.call_cnn_blocks(x)
        x, attention_weights = self.tranformer(x, y_true, training, enc_padding_mask, look_ahead_mask, dec_padding_mask)
        return x, attention_weights
        
    def call_cnn_blocks(self, x):
        for i,cnn_block in enumerate(self.cnn_blocks):
            x = cnn_block(x)
            
            if(i == self.max_pool_layer_idx):
                x = self.max_pool(x)
        return x

In [89]:
def load_data(size, generator):
    
#     x = []
#     y = []
#     for i in range(size):
#         x.append(i)
#         y.append(i)
#     return np.array(x), np.array(y)
    
    train_features = []
    train_labels = []
    while(len(train_features) < size):
        x,_ = next(generator.get_batch())
        y = x['the_labels']
        x = x['the_input']
        train_features.extend(x)
        train_labels.extend(y)
    return np.array(train_features),  np.array(train_labels)

def get_batched_examples(examples, batch_size, generator):
    train_x, train_y = load_data(examples, generator)
    train_x = train_x.astype('float32')
    
    data_train_x = tf.data.Dataset.from_tensor_slices(train_x)
    data_train_y = tf.data.Dataset.from_tensor_slices(train_y)
    
    data_train_x = data_train_x.batch(batch_size)
    data_train_y = data_train_y.batch(batch_size)
    
#     data_train_x = data_train_x.shuffle(train_x.shape[0])
#     data_train_y = data_train_y.shuffle(train_y.shape[0])

    data_train_x = data_train_x.prefetch(batch_size * 4)
    data_train_y = data_train_y.prefetch(batch_size * 4)

    return list(zip(data_train_x, data_train_y))

In [90]:
input_length = 300
rnn_padding = 5
use_maxpool = True

data_preper = DataPrepper(validation_split=0.1, test_split=0.1)

read_ids = data_preper.get_train_read_ids()
train_generator = DataGenerator(read_ids, batch_size=10, input_length=input_length, stride=30, reads_count=5, rnn_pad_size=rnn_padding, use_maxpool=use_maxpool)

Loading data from file:c:/Users/mirop/OneDrive/Documents/Programming/Data/bdm/umi11to5.hdf5
Loading data from file:c:/Users/mirop/OneDrive/Documents/Programming/Data/bdm/umi11to5.hdf5


In [148]:
fish_and_chips = FishAndChips(256, 5, 3)

In [136]:
def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_mean(loss_)

In [149]:
EPOCHS = 20

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

for epoch in range(1): #EPOCHS):
    start = time.time()
    
    train_loss.reset_states()
    train_accuracy.reset_states()
    
    train = get_batched_examples(1000, 32, train_generator)
    
    for (batch_id, (train_x, train_y)) in enumerate(train):
        train_step(train_x, train_y)
        
        break
    break

InvalidArgumentError: Incompatible shapes: [32,8,150,150] vs. [32,1,1,300,1] [Op:AddV2] name: fish_and_chips_4/transformer_3/encoder_3/encoder_layer_6/multi_head_attention_18/add/

In [139]:
def train_step(x,y):
#     tar_inp = tar[:, :-1]
#     tar_real = tar[:, 1:]

    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(x, y)
    
    with tf.GradientTape() as tape:
        predictions, _ = fish_and_chips(x,y,True,enc_padding_mask, combined_mask, dec_padding_mask)
        loss = loss_function(y, predictions)
        
    gradients = tape.gradient(loss, fish_and_chips.trainable_variables)    
    optimizer.apply_gradients(zip(gradients, fish_and_chips.trainable_variables))
    
    train_loss(loss)
    train_accuracy(y, predictions)
