In [1]:
import os
import six
import glob
import pickle

import numpy as np
import pandas as pd
import scipy.stats as spst

import p2pspatial
import pulse2percept.utils as p2pu

from sklearn.base import clone
import sklearn.metrics as sklm

%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('ggplot')

2018-04-08 04:44:21,646 [pulse2percept] [INFO] Welcome to pulse2percept


# Load data

In [2]:
def fix_data(pickle_files, verbose=True):
    data = []
    for pickle_file in pickle_files:
        if verbose:
            print('- Processing %s' % pickle_file)
        y_test, y_pred, best_params, specifics = pickle.load(open(pickle_file, 'rb'))
        
        if np.allclose([c in specifics for c in ['idx_fold', 'best_train_score', 'best_test_score']], True):
            if verbose:
                print("  - File up-to-date, skip.")
            continue

        if 'idx_fold' in specifics:
            idx_fold = specifics['idx_fold']
        else:
            idx_begin = pickle_file.find("shape3cvLOO-") + 12
            assert idx_begin > 12
            idx_end = pickle_file[idx_begin:].find("-")
            if idx_end > 2:
                print('  - No CV fold found, skip.')
                continue
            idx_fold = int(pickle_file[idx_begin:idx_begin + idx_end])
            
        if 'best_test_score' in specifics:
            best_test_score = specifics['best_test_score']
        elif 'best_score' in specifics:
            best_test_score = specifics['best_score'][0]
            del specifics['best_score']
        else:
            best_test_score = np.inf

        if 'best_train_score' in specifics:
            best_train_score = specifics['best_train_score']
        else:
            rootfolder = os.path.join(os.environ['SECOND_SIGHT_DATA'], 'shape')
            X, y = p2pspatial.load_data(rootfolder, subject=specifics['subject'], electrodes=None,
                                        amplitude=specifics['amplitude'], random_state=42,
                                        n_jobs=1, verbose=False)
            X, y = p2pspatial.exclude_bistables(X, y)
            if specifics['adjust_bias']:
                y = p2pspatial.adjust_drawing_bias(X, y,
                                                   scale_major=specifics['drawing']['major'],
                                                   scale_minor=specifics['drawing']['minor'],
                                                   rotate=specifics['drawing']['orient'])
                print('  - Adjusted for drawing bias:', X.shape, y.shape)
            if len(X) == 0:
                raise ValueError('No data found. Abort.')
            if specifics['avg_img']:
                X, y = p2pspatial.calc_mean_images(X, y)
            reg = specifics['regressor']
            reg.set_params(**best_params[0])
            reg.fit(X.drop(y_test[0].index));
            reg.set_params(engine='serial')
            best_train_score = reg.score(X.drop(y_test[0].index), y.drop(y_test[0].index))
        
        print('  - idx_fold=%d, best_train_score=%f, best_test_score=%f' % (idx_fold,\
                                                                            best_train_score,
                                                                            best_test_score))
        specifics['idx_fold'] = idx_fold
        specifics['best_train_score'] = best_train_score
        specifics['best_test_score'] = best_test_score

        pickle.dump((y_test, y_pred, best_params, specifics), open(pickle_file, 'wb'))
        print('  - Dumped new data to', pickle_file)

In [3]:
results_dir = '../../results/shape3cv/'
pickle_files = np.sort(glob.glob(os.path.join(results_dir, '*.pickle')))
print('Found', len(pickle_files), 'files')

Found 1158 files


In [None]:
subjects = ['12-005', '51-009', '52-001', 'TB']
assert_params = {
    'amplitude': 2.0,
}
rootfolder = os.path.join(os.environ['SECOND_SIGHT_DATA'], 'shape')

In [None]:
data = fix_data(pickle_files, verbose=False)

  - idx_fold=12, best_train_score=11.831376, best_test_score=3708451533.234240
  - Dumped new data to ../../results/shape3cv/51-009_D__shape3cvLOO-12-swarm_2018-04-08_01-21-51.pickle
  - idx_fold=2, best_train_score=9.539236, best_test_score=464341778997.627747
  - Dumped new data to ../../results/shape3cv/51-009_D__shape3cvLOO-2-swarm_2018-04-08_00-54-21.pickle
  - idx_fold=3, best_train_score=7.423799, best_test_score=158341419598.408722
  - Dumped new data to ../../results/shape3cv/51-009_D__shape3cvLOO-3-swarm_2018-04-08_03-10-27.pickle
  - idx_fold=8, best_train_score=10.041312, best_test_score=462407965844.873047
  - Dumped new data to ../../results/shape3cv/51-009_D__shape3cvLOO-8-swarm_2018-04-08_00-41-41.pickle
  - Adjusted for drawing bias: (90, 10) (90, 8)
  - idx_fold=1, best_train_score=14.738177, best_test_score=2033766455131.810303
  - Dumped new data to ../../results/shape3cv/51-009_D_adjust_shape3cvLOO-1-swarm_2018-04-07_13-22-57.pickle
  - Adjusted for drawing bias: (

In [None]:
print('All Done!')