In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
import time
import gc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform

from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler

from lmmnn.nn import reg_nn_ohe_or_ignore, reg_nn_embed, reg_nn_lmm, reg_nn_svdkl, reg_nn_cnn
from lmmnn.simulation import Count

import tensorflow.keras.backend as K

In [None]:
n = 5000
a = 2

A = np.matrix([np.random.randn(n) + np.random.randn(1)*a for i in range(n)])
A = A*np.transpose(A)
D_half = np.diag(np.diag(A)**(-0.5))
G = np.array(D_half*A*D_half)

sig2b = 0.8
sig2e = 1.0
b = np.random.multivariate_normal(np.zeros(n), G * sig2b)
e = np.random.normal(0, np.sqrt(sig2e), n)
y = b + e

y_df = pd.DataFrame({'y': y, 'b_true': b})

In [None]:
print(G.shape)
print(G[:5, :5])

In [None]:
y_df['z0'] = np.arange(y_df.shape[0])
y_df.head()

In [None]:
mode = 'mme'
batch = 100
epochs = 500
patience = 10
qs = []
q_spatial = len(y_df['z0'].unique())
n_neurons = [10]
dropout = []
activation = 'relu'
Z_non_linear = False # will currently only work with g(Z) = Z
Z_embed_dim_pct = 10
n_sig2bs = 0
n_sig2bs_spatial = 1
est_cors = []
time2measure_dict = None
spatial_embed_neurons = None 
# spatial_embed_neurons = [100, 50, 20, 10, 20, 50, 100] # uncomment for LMMNN-E
verbose = False
log_params = False
idx = None
shuffle = False
resolution = 100

In [None]:
def reg_nn(X_train, X_test, y_train, y_test, reg_type, b_true):
    start = time.time()
    if reg_type == 'lmmnn':
        b_hat, sigmas, _, _, n_epochs = reg_nn_lmm(
            X_train, X_test, y_train, y_test, qs, q_spatial, x_cols, batch, epochs, patience,
            n_neurons, dropout, activation, mode,
            n_sig2bs, n_sig2bs_spatial, est_cors, G, spatial_embed_neurons,
            verbose, Z_non_linear, Z_embed_dim_pct, log_params, idx, shuffle, b_true = b_true)
    else:
        raise ValueError(reg_type + 'is an unknown reg_type')
    end = time.time()
    K.clear_session()
    gc.collect()
    mse = np.mean((b_hat - y_df['y'])**2)
    plt.scatter(y_df['y'], b_hat, alpha = 0.5)
    plt.show()
    return mse, sigmas, n_epochs, end - start

In [None]:
res = pd.DataFrame(columns=['experiment', 'exp_type', 'mse', 'sigma_e_est', 'sigma_b0_est', 'n_epoch', 'time'])
counter = 0

def iterate_reg_types(X_train, X_test, y_train, y_test, b_true):
    mse_lmm, sigmas, n_epochs_lmm, time_lmm = reg_nn(X_train, X_test, y_train, y_test, reg_type='lmmnn', b_true=b_true)
    print(' finished lmmnn, mse: %.4f' % (mse_lmm))
    res.loc[next(counter)] = [i, 'lmmnn', mse_lmm, sigmas[0], sigmas[2][0], n_epochs_lmm, time_lmm]

kf = KFold(n_splits=5, shuffle=True, random_state=42)
counter = Count().gen()
X, y = y_df.drop(['b_true', 'y'], axis=1), y_df['y']
x_cols = [col for col in X.columns if col not in ['z0']]
b_true = y_df['b_true'].values
for i, (train_index, test_index) in enumerate(kf.split(X, y)):
    print('iteration %d' % i)
    X_train, X_test, y_train, y_test = X.loc[train_index].copy(), X.loc[test_index].copy(), y[train_index], y[test_index]
    iterate_reg_types(X_train, X_test, y_train, y_test, b_true)

In [None]:
res

In [None]:
# res.to_csv('../../results/res_mme.csv')