In [1]:
import os
import sys
import pickle
import numpy as np
from sklearn.metrics import mean_squared_error

sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))
from bayesmf.scripts.utils.model import run_kfold_xval
from bayesmf.scripts.utils.dataset import make_insilico_dataset, make_downsampled_dataset
from bayesmf.models.cmf import CMF, StochasticCMF

%reload_ext autoreload
%autoreload 2

In [2]:
DIR_PATH = '/home/sxchao/bayesmf/output'
handle = 'scmf_050720'

In [57]:
X, _ = make_insilico_dataset()

In [58]:
np.random.seed(22690)

idxs = np.arange(X.shape[1])
np.random.shuffle(idxs)

#X_train = X[:, idxs[:800]]
#X_test = X[:, idxs[800:]]

kfold = 5
splits = np.split(idxs, kfold)

In [None]:
K = 10
errs10 = []

for k in range(kfold):
    idxs_train = [i for j in np.setdiff1d(np.arange(kfold), k) for i in splits[j]]
    idxs_test = splits[k]
    X_train = X[:, idxs_train]
    X_test = X[:, idxs_test]
    
    factorizer = StochasticCMF(K=K, m=K-5, minibatch_size=200, step_size=1e-05, tolerance=0.001, max_iters=10, kwargs={'c0':0.05 * X.shape[0]})
    factorizer.fit(X_train)
    l = factorizer.transform(X_test, attr='l') 
    W = np.exp(factorizer.alpha[np.newaxis, :] + np.dot(l, factorizer.u.T)).T 
    H = factorizer.Eb.T
        
    err = mean_squared_error(X_test.T, np.matmul(W, H), squared=False)
    print(err)
    errs10.append(err)

In [None]:
K = 15
errs15 = []

for k in range(kfold):
    idxs_train = [i for j in np.setdiff1d(np.arange(kfold), k) for i in splits[j]]
    idxs_test = splits[k]
    X_train = X[:, idxs_train]
    X_test = X[:, idxs_test]

    factorizer = StochasticCMF(K=K, m=K-5, step_size=1e-05, tolerance=0.001, max_iters=10, kwargs={'c0':0.05 * X.shape[0]})
    factorizer.fit(X_train)
    l = factorizer.transform(X_test, attr='l') 
    W = np.exp(factorizer.alpha[np.newaxis, :] + np.dot(l, factorizer.u.T)).T 
    H = factorizer.Eb.T

    err = mean_squared_error(X_test.T, np.matmul(W, H), squared=False)
    print(err)
    errs15.append(err)

In [None]:
K = 20
errs20 = []

for k in range(kfold):
    idxs_train = [i for j in np.setdiff1d(np.arange(kfold), k) for i in splits[j]]
    idxs_test = splits[k]
    X_train = X[:, idxs_train]
    X_test = X[:, idxs_test]

    factorizer = StochasticCMF(K=K, m=K-5, step_size=1e-05, tolerance=0.001, max_iters=10, kwargs={'c0':0.05 * X.shape[0]})
    factorizer.fit(X_train)
    l = factorizer.transform(X_test, attr='l') 
    W = np.exp(factorizer.alpha[np.newaxis, :] + np.dot(l, factorizer.u.T)).T 
    H = factorizer.Eb.T

    err = mean_squared_error(X_test.T, np.matmul(W, H), squared=False)
    print(err)
    errs20.append(err)

In [41]:
X, _ = make_downsampled_dataset()

Variable names are not unique. To make them unique, call `.var_names_make_unique`.


In [42]:
np.random.seed(22690)

idxs = np.arange(X.shape[1])
np.random.shuffle(idxs)

#X_train = X[:, idxs[:800]]
#X_test = X[:, idxs[800:]]

kfold = 5
splits = np.split(idxs, kfold)

In [None]:
K = 10
derrs10 = []

for k in range(kfold):
    idxs_train = [i for j in np.setdiff1d(np.arange(kfold), k) for i in splits[j]]
    idxs_test = splits[k]
    X_train = X[:, idxs_train]
    X_test = X[:, idxs_test]
    
    factorizer = StochasticCMF(K=K, m=K-5, step_size=1e-05, tolerance=0.001, max_iters=10, init=None, kwargs={'c0':0.05 * X.shape[0]})
    factorizer.fit(X_train)
    l = factorizer.transform(X_test, attr='l') 
    W = np.exp(factorizer.alpha[np.newaxis, :] + np.dot(l, factorizer.u.T)).T 
    H = factorizer.Eb.T
        
    err = mean_squared_error(X_test.T, np.matmul(W, H), squared=False)
    print(err)
    derrs10.append(err)

In [None]:
K = 15
derrs15 = []

for k in range(kfold):
    idxs_train = [i for j in np.setdiff1d(np.arange(kfold), k) for i in splits[j]]
    idxs_test = splits[k]
    X_train = X[:, idxs_train]
    X_test = X[:, idxs_test]

    factorizer = StochasticCMF(K=K, m=K-5, step_size=1e-05, tolerance=0.001, max_iters=10, init=None, kwargs={'c0':0.05 * X.shape[0]})
    factorizer.fit(X_train)
    l = factorizer.transform(X_test, attr='l') 
    W = np.exp(factorizer.alpha[np.newaxis, :] + np.dot(l, factorizer.u.T)).T 
    H = factorizer.Eb.T

    err = mean_squared_error(X_test.T, np.matmul(W, H), squared=False)
    print(err)
    derrs15.append(err)

In [None]:
K = 20
derrs20 = []

for k in range(kfold):
    idxs_train = [i for j in np.setdiff1d(np.arange(kfold), k) for i in splits[j]]
    idxs_test = splits[k]
    X_train = X[:, idxs_train]
    X_test = X[:, idxs_test]

    factorizer = StochasticCMF(K=K, m=K-5, step_size=1e-05, tolerance=0.001, max_iters=10, init=None, kwargs={'c0':0.05 * X.shape[0]})
    factorizer.fit(X_train)
    l = factorizer.transform(X_test, attr='l') 
    W = np.exp(factorizer.alpha[np.newaxis, :] + np.dot(l, factorizer.u.T)).T 
    H = factorizer.Eb.T

    err = mean_squared_error(X_test.T, np.matmul(W, H), squared=False)
    print(err)
    derrs20.append(err)

In [48]:
# init=None
for errs in [errs10, errs15, errs20]:
    print(np.mean(errs))

1.5734142857619882
1.5727257711655738
1.5724411292628946


In [49]:
# init=None
for errs in [derrs10, derrs15, derrs20]:
    print(np.mean(errs))

4.300839159962477
4.2855199746437
4.286197600236283
