In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
from utils import gen_covariance, gen_beta2, gen_data
from exp_types import GTV
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Generate synthetic data -> test the effect of using threshold and then minimium spanning tree
# Other thing to try: Use cosh approximation of the problematic fused Lasso L1 term to avoid expanding the 
# dimensionality of the problem

sigma = gen_covariance(50, 0.5, 50, 10, 1)
beta = gen_beta2(n_features = 50)
X, X_test, y, y_test = gen_data(300, 50, covariance = sigma, beta = beta)

In [3]:
# Assemble params to feed into exp_types
# First do no thresholding or MST
p = {}
p['reg_params'] = {}
p['reg_params']['lambda_S'] = np.linspace(0, 1, 10)
p['reg_params']['lambda_TV'] = np.linspace(0, 1, 10)
p['reg_params']['lambda_1'] = np.linspace(0, 1, 10)

p['cov'] = sigma
p['use_skeleton'] = False
p['threshold'] = False

In [4]:
%time model1 = GTV.run(X, y, p)

started run
finished iterating
CPU times: user 1h 49min 1s, sys: 1h 30min 43s, total: 3h 19min 44s
Wall time: 33min 46s


In [5]:
# Assemble params to feed into exp_types
# Threshold
p = {}
p['reg_params'] = {}
p['reg_params']['lambda_S'] = np.linspace(0, 1, 10)
p['reg_params']['lambda_TV'] = np.linspace(0, 1, 10)
p['reg_params']['lambda_1'] = np.linspace(0, 1, 10)
 
p['cov'] = sigma
p['threshold'] = True
p['use_skeleton'] = False

In [6]:
%time model2 = GTV.run(X, y, p)

started run
finished iterating
CPU times: user 1h 44min 39s, sys: 1h 27min 42s, total: 3h 12min 22s
Wall time: 32min 18s


In [7]:
# Assemble params to feed into exp_types
# Threshold + MST
p = {}
p['reg_params'] = {}
p['reg_params']['lambda_S'] = np.linspace(0, 1, 10)
p['reg_params']['lambda_TV'] = np.linspace(0, 1, 10)
p['reg_params']['lambda_1'] = np.linspace(0, 1, 10)

p['cov'] = sigma
p['threshold'] = False
p['use_skeleton'] = True

In [8]:
%time model3 = GTV.run(X, y, p)

started run
finished iterating
CPU times: user 3min 48s, sys: 6min 40s, total: 10min 29s
Wall time: 1min 45s


In [None]:
# Assemble params to feed into exp_types
# Threshold + MST
p = {}
p['reg_params'] = {}
p['reg_params']['lambda_S'] = np.linspace(0, 1, 10)
p['reg_params']['lambda_TV'] = np.linspace(0, 1, 10)
p['reg_params']['lambda_1'] = np.linspace(0, 1, 10)

p['cov'] = sigma
p['threshold'] = True
p['use_skeleton'] = True

In [9]:
%time model4 = GTV.run(X, y, p)

started run
finished iterating
CPU times: user 3min 54s, sys: 6min 52s, total: 10min 47s
Wall time: 1min 49s


In [8]:
# Evaluate the differences in the models
from sklearn.metrics import r2_score
from utils import selection_accuracy, estimation_error

In [20]:
# Model 1: Neither thresholding or MST
print(r2_score(y_test, X_test @ beta))
print(r2_score(y_test, X_test @ model1.coef_ + model1.intercept_))
print(selection_accuracy(beta.ravel(), model1.coef_.ravel()))
print(estimation_error(beta.ravel(), model1.coef_.ravel()))

0.7334276114764147
0.6995571160481817
[0.8]
(array([0.58843757]), array([2.62018157]))


In [21]:
# Model 2: Only thresholding
print(r2_score(y_test, X_test @ beta))
print(r2_score(y_test, X_test @ model2.coef_ + model2.intercept_))
print(selection_accuracy(beta.ravel(), model2.coef_.ravel()))
print(estimation_error(beta.ravel(), model2.coef_.ravel()))

0.7334276114764147
0.705279292917866
[0.8]
(array([0.58682296]), array([2.56728562]))


In [22]:
# Model 3: MST, no thresholding
print(r2_score(y_test, X_test @ beta))
print(r2_score(y_test, X_test @ model3.coef_ + model3.intercept_))
print(selection_accuracy(beta.ravel(), model3.coef_.ravel()))
print(estimation_error(beta.ravel(), model3.coef_.ravel()))

0.7334276114764147
0.7173489767847994
[0.8]
(array([0.55181849]), array([2.3757344]))


In [23]:
# Model 4: MST, with thresholding
print(r2_score(y_test, X_test @ beta))
print(r2_score(y_test, X_test @ model4.coef_ + model4.intercept_))
print(selection_accuracy(beta.ravel(), model4.coef_.ravel()))
print(estimation_error(beta.ravel(), model4.coef_.ravel()))

0.7334276114764147
0.7169777431922189
[0.8]
(array([0.56337906]), array([2.50106631]))


In [24]:
# Pickle away models:
import pickle
with open('GTV_test_models', 'wb') as f:
    f.write(pickle.dumps(model1))
    f.write(pickle.dumps(model2))
    f.write(pickle.dumps(model3))
    f.write(pickle.dumps(model4))

In [3]:
## Approximate models
p = {}
p['reg_params'] = {}
p['reg_params']['lambda_S'] = np.linspace(0, 1, 10)
p['reg_params']['lambda_TV'] = np.linspace(0, 1, 10)
p['reg_params']['lambda_1'] = np.linspace(0, 1, 10)

p['cov'] = sigma
p['use_skeleton'] = False
p['threshold'] = False

In [4]:
%time approx_model1 = GTV.run(X, y, p)

started run


  return opt.minimize(f, x0, progress=progress, args=args)


finished iterating
CPU times: user 1h 28min 23s, sys: 2h 33min 12s, total: 4h 1min 35s
Wall time: 40min 25s


In [5]:
## Approximate models
p = {}
p['reg_params'] = {}
p['reg_params']['lambda_S'] = np.linspace(0, 1, 10)
p['reg_params']['lambda_TV'] = np.linspace(0, 1, 10)
p['reg_params']['lambda_1'] = np.linspace(0, 1, 10)

p['cov'] = sigma
p['use_skeleton'] = True
p['threshold'] = False

In [6]:
%time approx_model2 = GTV.run(X, y, p)

started run
finished iterating
CPU times: user 3min 17s, sys: 3min 37s, total: 6min 54s
Wall time: 1min 10s


In [11]:
# Generate synthetic data and estimate a block covariance structure. 

sigma = gen_covariance(1000, 0.25, 50, 10, 0)
beta = gen_beta2(n_features = 1000)
X, X_test, y, y_test = gen_data(3000, 1000, covariance = sigma, beta = beta)

In [12]:
from sklearn.covariance import oas

In [13]:
# Estimate covariance matrix
sigma_hat = oas(X)[0]

In [14]:
# Use MST 
p = {}
p['reg_params'] = {}
p['reg_params']['lambda_S'] = np.linspace(0, 1, 10)
p['reg_params']['lambda_TV'] = np.linspace(0, 1, 10)
p['reg_params']['lambda_1'] = np.linspace(0, 1, 10)

p['cov'] = sigma_hat
p['threshold'] = False
p['use_skeleton'] = True

In [None]:
%time block_model = GTV.run(X, y, p)

started run


In [9]:
print(r2_score(y_test, X_test @ beta))
print(r2_score(y_test, X_test @ block_model.coef_ + block_model.intercept_))
print(selection_accuracy(beta.ravel(), block_model.coef_.ravel()))
print(estimation_error(beta.ravel(), block_model.coef_.ravel()))

0.7368708341262025
0.6828258740869844
[0.8]
(array([0.41612228]), array([1.82915653]))
