### Torch VS TF Notmad ###

Torch Version

In [1]:
import numpy as np
import torch
from pytorch_lightning import Trainer


from contextualized.dags.torch_notmad.callbacks import *
from contextualized.dags.torch_notmad.torch_notmad import NOTMAD_model
from contextualized.dags.torch_notmad.datamodules import CXW_DataModule
from contextualized.dags.torch_notmad.test_notmad.test_data import create_test_cxw_network
from contextualized.modules import Explainer


In [2]:
#Train notmad
np.random.seed(42)
torch.manual_seed(42)

from pytorch_lightning.utilities.model_summary import ModelSummary

C, X, W = create_test_cxw_network(n=500)
datamodule = CXW_DataModule(C, X, W)

trainer = Trainer(max_epochs = 5)

k = 6
INIT_MAT = np.random.uniform(-0.01, 0.01, 
                             size=(k,4,4))

model = NOTMAD_model(datamodule,
                     init_mat=INIT_MAT,
                     n_archetypes=k,
               )
ModelSummary(model)

trainer.tune(model)
trainer.fit(model, datamodule)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


TypeError: NGAM.__init__() got an unexpected keyword argument 'n_hidden_layers'

In [6]:
#Evaluate results
C_train = trainer.model.datamodule.C_train
C_test = trainer.model.datamodule.C_test
W_train = trainer.model.datamodule.W_train
W_test = trainer.model.datamodule.W_test
X_train = trainer.model.datamodule.X_train
X_test = trainer.model.datamodule.X_test

torch_notmad_preds_train = trainer.model.predict_w(C_train, confirm_project_to_dag=True)
torch_notmad_preds = trainer.model.predict_w(C_test).squeeze().detach().numpy()

mse = lambda true, pred: ((true - pred)**2).mean()
dag_pred = lambda x, w: np.matmul(x, w).squeeze()
dags_pred = lambda xs, w: [dag_pred(x,w) for x in xs]

example_preds = dags_pred(X_train, torch_notmad_preds_train)
actual_preds = dags_pred(X_train, W_train)
# print(example_preds[0])
# print(actual_preds[0])
# print(mse(example_preds[0],actual_preds[0]))
print(f"train L2: {mse(torch_notmad_preds_train, W_train)}")
print(f"test L2:  {mse(torch_notmad_preds, W_test)}")
print(f"train mse: {mse(dag_pred(X_train, torch_notmad_preds_train), X_train)}")
print(f"test mse:  {mse(dag_pred(X_test, torch_notmad_preds), X_test)}")

train L2: 0.1314468311272936
test L2:  0.13175667931519097
train mse: 0.02104613928705072
test mse:  0.020614418241872864


In [7]:
from test_notmad.tf_notmad import NOTMAD as tf_notmad_model
import tensorflow as tf

In [9]:

#fit notmad

def fit_notmad(sample_specific_loss_params, archetype_loss_params, 
                  C_train, X_train, k, project, notears_pop):
    
    init_mat = np.random.uniform(-0.01, 0.01, size=(k, X_train.shape[-1], X_train.shape[-1]))
    make_notmad = lambda: tf_notmad_model(
        C_train.shape, X_train.shape, k,
        sample_specific_loss_params, archetype_loss_params,
        n_encoder_layers=2, encoder_width=32,
        activation='linear', #init_mat=init_mat,
        project_archs_to_dag=False,
        learning_rate=1e-3, #project_archs_to_dag=project,
        project_distance=1.0,
        context_activity_regularizer=tf.keras.regularizers.l1(0),
        use_compatibility=False, update_compat_by_grad=False,
        pop_model=None
    )
    notmad_ = make_notmad()
    notmad_.fit(
        C_train, X_train, batch_size=1, epochs=100, 
        es_patience=2, verbose=1
    )
    return notmad_

k = 5
loss_params = {'l1': 0., 'init_alpha': 1e-1, 'init_rho': 1e-2}
# loss_params = {'l1': 1e-2, 'alpha': 1e-1, 'rho': 1e-2}
archetype_loss_params = {'l1': 0., 'alpha': 1e-1, 'rho': 1e-2}

tf_notmad = fit_notmad(
    loss_params, archetype_loss_params,
    C_train, X_train, k, project=True, 
    notears_pop=None
)


NOTMAD Training:   0%|           0/100 ETA: ?s,  ?epochs/s



In [11]:
#eval tf
dag_pred = lambda x, w: np.matmul(x, w).squeeze()
mse = lambda true, pred: ((true - pred)**2).mean()

tf_notmad_preds_train = tf_notmad.predict_w(C_train, project_to_dag=False).squeeze()
tf_notmad_preds = tf_notmad.predict_w(C_test, project_to_dag=False).squeeze()

# print(tf_notmad_preds_train[0])
# print(W_train[0])
print(f"train L2: {mse(tf_notmad_preds_train, W_train)}")
print(f"test L2:  {mse(tf_notmad_preds, W_test)}")
print(f"train mse: {mse(dag_pred(X_train, tf_notmad_preds_train), X_train)}")
print(f"test mse:  {mse(dag_pred(X_test, tf_notmad_preds), X_test)}")


train L2: 0.12782335274382325
test L2:  0.13142865585619545
train mse: 0.021978381122471947
test mse:  0.021363331720332283


In [12]:
#Compare predictions
for i in range(10):
    print(f'True: {W_train[i]}')
    print(f'Torch: {torch_notmad_preds_train[i]}')
    print(f'TF: {tf_notmad_preds_train[i]}')
    print('---------------------------')

true: [[ 0.         -0.05811623  0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          3.77091257  0.          0.        ]
 [ 0.          7.3226739   1.94188377  0.        ]]
torch: [[ 0.         -0.5559784   0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.04470294  4.164861    0.          0.        ]
 [-0.05763132  4.68571     1.8459836   0.        ]]
tf: [[ 0.0000000e+00 -2.8542557e-01  2.5538774e-02 -5.1620044e-03]
 [-1.2898748e-02  0.0000000e+00  1.5390925e-03  1.6665417e-03]
 [ 6.2200982e-02  3.8569226e+00  0.0000000e+00  4.6608485e-03]
 [-5.0657275e-03  4.3348575e+00  1.7843938e+00  0.0000000e+00]]
---------------------------
true: [[ 0.         -0.30260521  0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          2.88114907  0.          0.        ]
 [ 0.          4.89044742  1.69739479  0.        ]]
torch: [[ 0.         -0.4239642   0.          0.        ]
 [ 0.          0.      