In [1]:
import timeit
import multiprocessing
import ray
import numpy as np
import unittest
import warnings 

from scipy.stats import pearsonr

from sklearn.datasets import make_classification, make_regression
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score, accuracy_score, explained_variance_score

import sklearn.model_selection
import pipecaster.metaprediction
from pipecaster import synthetic_data
from pipecaster.pipeline import Pipeline
from pipecaster.metaprediction import MetaClassifier, MetaRegressor
from pipecaster.model_selection import cross_val_score

In [37]:
import sklearn.model_selection 

seed = None

X, y = make_regression(n_samples=500, n_features=1000, n_informative=500, random_state=seed)
        
rgr = KNeighborsRegressor(n_neighbors=5, weights='uniform')
rgr.fit(X, y)
rgr_predictions = rgr.predict(X)

sklearn.model_selection.cross_val_score(rgr, X, y, cv=3, scoring='explained_variance')

array([-0.04430255, -0.16294141, -0.03633957])

In [13]:
mrgr = Pipeline(n_inputs)
layer0 = mrgr.get_next_layer()
layer0[:] = StandardScaler()
layer1 = mrgr.get_next_layer()
layer1[:] = KNeighborsRegressor(n_neighbors=5, weights='uniform')
layer2 = mrgr.get_next_layer()
layer2[:] = MetaRegressor('mean voting')

cross_val_score(mrgr, [X], y, prediction_method='predict', 
                 scoring_metric=explained_variance_score, cv=3, n_jobs=1, verbose=0, 
                 fit_params=None, error_score=np.nan)

[0.3343266057838239, 0.3112338234966967, 0.3576686030218642]

In [39]:

seed=None

n_inputs = 6
mean_accuracies, median_accuracies = [], []

sklearn_params = {'n_targets':1, 
          'n_samples':500, 
          'n_features':10, 
          'n_informative':5}

i = 6

Xs, y, _ = synthetic_data.make_multi_input_regression(n_informative_Xs=i, 
                        n_weak_Xs=0,
                        n_random_Xs=n_inputs - i,
                        weak_noise_sd=None,
                        seed = seed,
                        **sklearn_params                                   
                        )

mrgr = Pipeline(n_inputs)
layer0 = mrgr.get_next_layer()
layer0[:] = StandardScaler()
layer1 = mrgr.get_next_layer()
layer1[:] = KNeighborsRegressor(n_neighbors=5, weights='uniform')
layer2 = mrgr.get_next_layer()
layer2[:] = MetaRegressor('mean voting')

cross_val_score(mrgr, Xs, y, prediction_method='predict', 
                         scoring_metric=explained_variance_score, cv=3, n_jobs=1, verbose=0, 
                         fit_params=None, error_score=np.nan)

[0.16146119895625055, 0.15251922589133493, 0.18415469885178082]

In [54]:
from sklearn.linear_model import LinearRegression
from pipecaster.model_selection import split_Xs
from sklearn.metrics import explained_variance_score

seed=None

n_inputs = 6

sklearn_params = {'n_targets':1, 
          'n_samples':500, 
          'n_features':10, 
          'n_informative':5}

i = 6

Xs, y, _ = synthetic_data.make_multi_input_regression(n_informative_Xs=i, 
                        n_weak_Xs=0,
                        n_random_Xs=n_inputs - i,
                        weak_noise_sd=None,
                        seed = seed,
                        **sklearn_params                                   
                        )

mrgr = Pipeline(n_inputs)
layer0 = mrgr.get_next_layer()
layer0[:] = StandardScaler()
layer1 = mrgr.get_next_layer()
layer1[:] = LinearRegression()
layer2 = mrgr.get_next_layer()
layer2[:] = MetaRegressor('mean voting')

train_indices, test_indices = range(400), range(400,500)
Xs_train, Xs_test = split_Xs(Xs, train_indices, test_indices)
y_train, y_test = y[train_indices], y[test_indices]

mrgr.fit(Xs_train, y_train)
explained_variance_score(y_test, mrgr.predict(Xs_test))

0.33997925248192495

In [55]:
from joblib import dump, load
dump(mrgr, 'save_test.joblib') 

['save_test.joblib']

In [56]:
loaded_mrgr = load('save_test.joblib') 

In [57]:
explained_variance_score(y_test, loaded_mrgr.predict(Xs_test))

0.33997925248192495

In [58]:
x=np.arange(9)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [60]:
i = set([4,6])

In [62]:
x[list(i)]

array([4, 6])

In [63]:
import ray
ray.init()

2020-11-25 22:12:36,684	INFO resource_spec.py:212 -- Starting Ray with 4.44 GiB memory available for workers and up to 2.24 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-11-25 22:12:37,826	INFO services.py:551 -- Failed to connect to the redis server, retrying.
2020-11-25 22:12:39,005	INFO services.py:1148 -- View the Ray dashboard at [1m[32mlocalhost:8265[39m[22m


{'node_ip_address': '192.168.1.251',
 'redis_address': '192.168.1.251:32541',
 'object_store_address': '/tmp/ray/session_2020-11-25_22-12-36_431580_6738/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2020-11-25_22-12-36_431580_6738/sockets/raylet',
 'webui_url': 'localhost:8265',
 'session_dir': '/tmp/ray/session_2020-11-25_22-12-36_431580_6738'}

In [73]:
class Foobar:
    
    @ray.remote
    def run(x):
        return 2*x

In [74]:
jobs = [Foobar.run.remote(i) for i in range(10)]

##### 25*60


In [79]:
250000/1500


166.66666666666666

In [75]:
ray.get(jobs)

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

In [53]:
from inspect import signature, getfullargspec

class Foobar:
    
    def __init__(self, a, b, c):
        self.param_names = set(getfullargspec(Foobar.__init__)[0])
        self.param_names.remove('self')

In [54]:
fb = Foobar(1,2,3)

In [55]:
fb.param_names

{'a', 'b', 'c'}

In [43]:
param_names = set(fb.args[0])
param_names.remove('self')

In [44]:
param_names

{'a', 'b', 'c'}