In [1]:
%load_ext autoreload
%autoreload 2

from model_awg import AdversarialWeightGenenerator, create_model
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K

In [2]:
#(batch, num_layers, seq_len, seq_len)
num_layers = 2
seq_len = 100
model, optimizer = create_model("test_model", load_checkpoint=False, num_layers=num_layers, seq_len=seq_len)

Input shape is: (2, 100, 100)
Output features: 100


In [3]:
x = tf.random.normal((55, num_layers, seq_len, seq_len))
x.shape

TensorShape([55, 2, 100, 100])

In [4]:
y = model(x)
print(y.shape)

(55, 2, 100, 100)


In [5]:
model.trainable_weights[0].shape

TensorShape([100, 256])

In [6]:
trainable_count = np.sum([K.count_params(w) for w in model.trainable_weights])
non_trainable_count = int(np.sum([K.count_params(w) for w in model.non_trainable_weights]))

print(f'Total params: {trainable_count + non_trainable_count}')
print(f'Trainable params: {trainable_count}')
print(f'Non-trainable params: {non_trainable_count}')

Total params: 51556
Trainable params: 51556
Non-trainable params: 0


In [7]:
def create_dataset(N=100):
    x =  tf.nn.softmax(tf.random.uniform((N, num_layers, seq_len, seq_len)))
#     y = tf.nn.softmax(tf.random.uniform((N, num_layers, seq_len, seq_len)))
    y = tf.nn.softmax(x*2)
    return (x, y)

In [8]:
data = create_dataset()

x, y = data

In [9]:
print(x.shape)
print(y.shape)
tf.reduce_sum(y[0][0][0])

(100, 2, 100, 100)
(100, 2, 100, 100)


<tf.Tensor: id=104, shape=(), dtype=float32, numpy=1.0>

In [10]:
def evaluate(model, xt, yt):    
    
    # Eval
    ypred = model(xt, training=False)
    loss = loss_obj(yt, ypred)
    return loss

In [11]:
model, optimizer = create_model("test_model", load_checkpoint=False, num_layers=num_layers, seq_len=seq_len)
loss_obj = tf.keras.losses.KLDivergence()

(xt, yt) = create_dataset(N=1000) # test
epoch = 1

(x, y) = create_dataset(N=100)
print(f"{epoch:3}. Out: {evaluate(model, xt, yt):.7e}  In: {evaluate(model, x, y):.7e}  ")

for _ in range(10_000):        
    epoch += 1

    with tf.GradientTape() as tape:
            pred = model(x, training=True)        

            loss = loss_obj(y, pred)
#                 print(loss)
    #         loss = tf.keras.losses.binary_crossentropy(y_true=y, y_pred=logits, from_logits=True)

    grads = tape.gradient(loss, model.trainable_weights)                
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

#     (xt, yt) = create_dataset(N=100)
    if epoch % 100 == 0:
        print(f"{epoch:3}. Out: {evaluate(model, xt, yt):.7e}  In: {evaluate(model, x, y):.7e}  ")
    

Input shape is: (2, 100, 100)
Output features: 100
  1. Out: 6.4122818e-05  In: 6.4099499e-05  
100. Out: 1.7920587e-05  In: 1.7933527e-05  
200. Out: 1.5646167e-05  In: 1.5647212e-05  
300. Out: 1.3302302e-05  In: 1.3291939e-05  
400. Out: 1.1124473e-05  In: 1.1105893e-05  
500. Out: 9.1896654e-06  In: 9.1649072e-06  
600. Out: 7.5110220e-06  In: 7.4824156e-06  
700. Out: 6.0755419e-06  In: 6.0455232e-06  
800. Out: 4.8613242e-06  In: 4.8305697e-06  
900. Out: 3.8438452e-06  In: 3.8139538e-06  
1000. Out: 2.9994083e-06  In: 2.9713112e-06  
1100. Out: 2.3056700e-06  In: 2.2808106e-06  
1200. Out: 1.7432351e-06  In: 1.7213546e-06  
1300. Out: 1.2939282e-06  In: 1.2755753e-06  
1400. Out: 9.4138625e-07  In: 9.2613249e-07  
1500. Out: 6.7110881e-07  In: 6.5927821e-07  
1600. Out: 4.7033240e-07  In: 4.6105040e-07  
1700. Out: 3.1891494e-07  In: 3.1255405e-07  
1800. Out: 2.1391982e-07  In: 2.0914776e-07  
1900. Out: 1.4040945e-07  In: 1.3681583e-07  
2000. Out: 9.1588703e-08  In: 8.9436782