In [1]:
%load_ext autoreload
%autoreload 2

from model_awg import AdversarialWeightGenenerator, create_model
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K

In [2]:
#(batch, num_layers, seq_len, seq_len)
num_layers = 2
seq_len = 100
model, optimizer = create_model("test_model", load_checkpoint=False, num_layers=num_layers, seq_len=seq_len)

Input shape is: (2, 100, 100)
Output features: 100


In [3]:
x = tf.random.normal((55, num_layers, seq_len, seq_len))
x.shape

TensorShape([55, 2, 100, 100])

In [4]:
y = model(x)
print(y.shape)

(55, 2, 100, 100)


In [5]:
model.trainable_weights[0].shape

TensorShape([100, 256])

In [6]:
trainable_count = np.sum([K.count_params(w) for w in model.trainable_weights])
non_trainable_count = int(np.sum([K.count_params(w) for w in model.non_trainable_weights]))

print(f'Total params: {trainable_count + non_trainable_count}')
print(f'Trainable params: {trainable_count}')
print(f'Non-trainable params: {non_trainable_count}')

Total params: 51556
Trainable params: 51556
Non-trainable params: 0


In [7]:
def create_dataset(N=100):
    x =  tf.nn.softmax(tf.random.uniform((N, num_layers, seq_len, seq_len)))
    y = tf.nn.softmax(tf.random.uniform((N, num_layers, seq_len, seq_len)))
#     y = tf.nn.softmax(-2.0*x**3 + 5.0*x**2 -2.0*x + 1.0)
    return (x, y)

In [8]:
data = create_dataset()

x, y = data

In [9]:
print(x.shape)
print(y.shape)
tf.reduce_sum(y[0][0][0])

(100, 2, 100, 100)
(100, 2, 100, 100)


<tf.Tensor: id=109, shape=(), dtype=float32, numpy=1.0>

In [10]:
def evaluate(model, xt, yt):    
    
    # Eval
    ypred = model(xt, training=False)
    loss = loss_obj(yt, ypred)
    return loss

In [11]:
model, optimizer = create_model("test_model", load_checkpoint=False, num_layers=num_layers, seq_len=seq_len)
loss_obj = tf.keras.losses.KLDivergence()

(xt, yt) = create_dataset(N=1000) # test
epoch = 1

(x, y) = create_dataset(N=100)
print(f"{epoch:3}. Out: {evaluate(model, xt, yt):.7e}  In: {evaluate(model, x, y):.7e}  ")

for _ in range(10_000):        
    epoch += 1

    with tf.GradientTape() as tape:
            pred = model(x, training=True)        

            loss = loss_obj(y, pred)
#                 print(loss)
    #         loss = tf.keras.losses.binary_crossentropy(y_true=y, y_pred=logits, from_logits=True)

    grads = tape.gradient(loss, model.trainable_weights)                
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

#     (xt, yt) = create_dataset(N=100)
    if epoch % 100 == 0:
        print(f"{epoch:3}. Out: {evaluate(model, xt, yt):.7e}  In: {evaluate(model, x, y):.7e}  ")
    

Input shape is: (2, 100, 100)
Output features: 100
  1. Out: 4.0291168e-02  In: 4.0298093e-02  
100. Out: 4.0260758e-02  In: 4.0248532e-02  
200. Out: 4.0262524e-02  In: 4.0224187e-02  
300. Out: 4.0268406e-02  In: 4.0193390e-02  
400. Out: 4.0282562e-02  In: 4.0156554e-02  
500. Out: 4.0309303e-02  In: 4.0119518e-02  
600. Out: 4.0345937e-02  In: 4.0090874e-02  
700. Out: 4.0382288e-02  In: 4.0073249e-02  
800. Out: 4.0410798e-02  In: 4.0063918e-02  
900. Out: 4.0430028e-02  In: 4.0058877e-02  
1000. Out: 4.0442467e-02  In: 4.0056389e-02  
1100. Out: 4.0450189e-02  In: 4.0055007e-02  
1200. Out: 4.0454999e-02  In: 4.0054280e-02  
1300. Out: 4.0458038e-02  In: 4.0053908e-02  
1400. Out: 4.0459964e-02  In: 4.0053636e-02  
1500. Out: 4.0461235e-02  In: 4.0053505e-02  
1600. Out: 4.0462155e-02  In: 4.0053453e-02  
1700. Out: 4.0462643e-02  In: 4.0053360e-02  
1800. Out: 4.0463023e-02  In: 4.0053334e-02  
1900. Out: 4.0463328e-02  In: 4.0053334e-02  
2000. Out: 4.0463507e-02  In: 4.0053315