In [115]:
import pickle
from csv import reader, writer
from benlib.glm import *

In [128]:
def load_csv(filename):
    x = []
    with open(filename, 'r') as csvfile:
        rdr = reader(csvfile)
        for row in rdr:
            x.append([float(r) for r in row])
    return np.array(x)

def save_csv(x, filename):
    with open(filename, 'w') as csvfile:
        wrtr = writer(csvfile)
        for row in x:
            wrtr.writerow(row)

n_f = 25
n_h = 8

X_t_fh = load_csv('cvglmnet_test_X.csv')

n_t = X_t_fh.shape[0]
X_tfh = X_t_fh.reshape(n_t, n_f, n_h)

y = np.squeeze(load_csv('cvglmnet_test_y.csv'))
y_hat_check = np.squeeze(load_csv('cvglmnet_test_y_hat.csv'))
train_scores_check = np.squeeze(load_csv('cvglmnet_test_train_scores.csv'))
test_scores_check = np.squeeze(load_csv('cvglmnet_test_test_scores.csv'))

In [129]:
save_results_to_csv = False

folds = select_folds(n_t, 10)

print('Fitting with CVGLMnet...')
cvglm = CVGLMnet()

fits = []
y_hats = []
test_scores = []
train_scores = []
for train_idx, test_idx in folds:
    cvglm.fit(X_t_fh[train_idx,:], y[train_idx])
    fits.append(cvglm.dump())
    y_hats.append(cvglm.predict(X_t_fh))
    train_scores.append(cvglm.score(X_t_fh[train_idx, :], y[train_idx]))
    test_scores.append(cvglm.score(X_t_fh[test_idx, :], y[test_idx]))
print(train_scores)
y_hat = np.vstack(y_hats)
print('Done')

if save_results_to_csv:
    print('Saving results to csv...')
    save_csv(y_hat, 'cvglmnet_test_y_hat.csv')
    save_csv([train_scores], 'cvglmnet_test_train_scores.csv')
    save_csv([test_scores], 'cvglmnet_test_test_scores.csv')

else:
    print('Checking results against csv...')
    assert(np.all(y_hat-y_hat_check<1e-12))
    assert(all([(a-b)<1e-12 for a,b in zip(train_scores, train_scores_check)]))
    assert(all([(a-b)<1e-12 for a,b in zip(test_scores, test_scores_check)]))

    print('All OK')

print('Saving fits to pickle...')
with open('results.pkl', 'wb') as pkl:
    pickle.dump(fits, pkl)
print('Done')

Fitting with CVGLMnet...
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
[0.4682102245059006, 0.47400384174639504, 0.4622965626925786, 0.469275157081933, 0.46096343991818023, 0.47112922428386994, 0.4672439336936833, 0.4727721256036528, 0.4737993923863963, 0.4714265485790933]
Done
Checking results against csv...
All OK
Saving fits to pickle...
Done


In [130]:
# check that loading fits works and gives sames results
print('Loading fits from pickle...')
with open('results.pkl', 'rb') as pkl:
    loaded_fits = pickle.load(pkl)
print('Done')

print('Checking loaded fits are the same as the originals...')
train_scores_check = []
test_scores_check = []
for (train_idx, test_idx), fit in zip(folds, loaded_fits):
    cvglm_check = CVGLMnet(fit)
    train_scores_check.append(cvglm_check.score(X_t_fh[train_idx, :], y[train_idx]))
    test_scores_check.append(cvglm_check.score(X_t_fh[test_idx, :], y[test_idx]))
print(train_scores_check)

assert(all([(a-b)<1e-12 for a,b in zip(train_scores, train_scores_check)]))
assert(all([(a-b)<1e-12 for a,b in zip(test_scores, test_scores_check)]))
print('All OK')

# check that CVGLMnet_tfh gives same results as CVGLMnet
print('Fitting with CVGLMnet_tfh...')

cvglm_tfh = CVGLMnet_tfh()

train_scores_tfh = []
test_scores_tfh = []
for (train_idx, test_idx), fit in zip(folds, loaded_fits):
    cvglm_tfh.fit(X_tfh[train_idx,:,:], y[train_idx])
    train_scores_tfh.append(cvglm_tfh.score(X_tfh[train_idx,:,:], y[train_idx]))
    test_scores_tfh.append(cvglm_tfh.score(X_tfh[test_idx,:,:], y[test_idx]))
print(train_scores_check)

print('Checking CVGLMnet_tfh fits are the same as the originals...')
assert(all([(a-b)<1e-12 for a,b in zip(train_scores, train_scores_tfh)]))
assert(all([(a-b)<1e-12 for a,b in zip(test_scores, test_scores_tfh)]))
print('All OK')

Loading fits from pickle...
Done
Checking loaded fits are the same as the originals...
[0.4682102245059006, 0.47400384174639504, 0.4622965626925786, 0.469275157081933, 0.46096343991818023, 0.47112922428386994, 0.4672439336936833, 0.4727721256036528, 0.4737993923863963, 0.4714265485790933]
All OK
Fitting with CVGLMnet_tfh...
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
Using 10 sequential folds for cross-validation


[status]	Parallel glmnet cv with 10 cores


Refitting with best lambda on whole training set
[0.4682102245059006, 0.47400384174639504, 0.4622965626925786, 0.469275157081933, 0.46096343991818023, 0.47112922428386994, 0.4672439336936833, 0.4727721256036528, 0.4737993923863963, 0.4714265485790933]
Checking CVGLMnet_tfh fits are the same as the originals...
All OK


In [None]:
if False:
    from csv import writer
    from hierarchy import *

    clusters = Clusters()

    n_h = 8

    segments = clusters.stimulus.X_tf
    y_t = clusters.clusters[0]['y_t']
    X_tfh = tensorize_segments(segments, n_h=n_h)
    n_t, n_f = X_tfh.shape[:2]
    X_t_fh = X_tfh.reshape(X_tfh.shape[0], -1)
    
    with open('cvglmnet_test_X.csv', 'w') as csvfile:
        wrtr = writer(csvfile)
        for row in X_t_fh:
            wrtr.writerow(row)

    with open('cvglmnet_test_y.csv', 'w') as csvfile:
        wrtr = writer(csvfile)
        wrtr.writerow(y_t.T)