In [35]:
from __future__ import print_function
# import numpy as np
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from hyperopt import tpe
import hpsklearn
import sys
 
def test_demo_boston():
 
    boston = datasets.load_boston()
    X_train, X_test, y_train, y_test = train_test_split(
        boston.data, boston.target, test_size=.25, random_state=1)
 
    estimator = hpsklearn.HyperoptEstimator(
        preprocessing=hpsklearn.components.any_preprocessing('pp'),
        regressor=hpsklearn.components.any_regressor('reg'),
        algo=tpe.suggest,
        trial_timeout=15.0,  # seconds
        max_evals=10,
        seed=1
    )
 
    # /BEGIN `Demo version of estimator.fit()`
    print('', file=sys.stderr)
    print('====Demo regression on Boston dataset====', file=sys.stderr)
 
 
    iterator = estimator.fit_iter(X_train, y_train)
    next(iterator)
 
    n_trial = 0
    while len(estimator.trials.trials) < estimator.max_evals:
        iterator.send(1)  # -- try one more model
        n_trial += 1
        print('Trial', n_trial, 'loss:', estimator.trials.losses()[-1], 
              file=sys.stderr)
        # hpsklearn.demo_support.scatter_error_vs_time(estimator)
        # hpsklearn.demo_support.bar_classifier_choice(estimator)
 
    estimator.retrain_best_model_on_full_data(X_train, y_train)
 
    # /END Demo version of `estimator.fit()`
 
    print('Test R2:', estimator.score(X_test, y_test), file=sys.stderr)
    print('Best model:',estimator.best_model(), file=sys.stderr)
    print('====End of demo====', file=sys.stderr)
test_demo_boston()


====Demo regression on Boston dataset====
Trial 1 loss: 0.19759424318745022
Trial 2 loss: 0.14366457166851354
Trial 3 loss: 0.4707631958232238
Trial 4 loss: 0.599141331680336
Trial 5 loss: 8.638719700452809
Trial 6 loss: 0.9934061336748272
Trial 7 loss: 0.16280310633246675
Trial 8 loss: 0.24592559042690443
Trial 9 loss: 0.866110870321711
Trial 10 loss: 6.6132039790501524
Test R2: 0.822591701166
Best model: {'learner': AdaBoostRegressor(base_estimator=None, learning_rate=0.17142473298939273,
         loss='linear', n_estimators=57, random_state=1), 'preprocs': (MinMaxScaler(copy=True, feature_range=(0.0, 1.0)),), 'ex_preprocs': ()}
====End of demo====


In [None]:
from __future__ import print_function
# import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from hyperopt import tpe
import hpsklearn
import sys
from sklearn import preprocessing
from sklearn import utils

# check the official repo for info: https://github.com/hyperopt/hyperopt-sklearn
def test_demo_iris():
 
    iris = datasets.load_iris()
    lab_enc = preprocessing.LabelEncoder()
    iris.target = lab_enc.fit_transform(iris.target.astype('int'))
    X_train, X_test, y_train, y_test = train_test_split(
        iris.data, iris.target, test_size=.25, random_state=1)
 

    estimator = hpsklearn.HyperoptEstimator(
        preprocessing=hpsklearn.components.any_preprocessing('pp'),
        classifier=hpsklearn.components.any_classifier('clf'),
        algo=tpe.suggest,
        trial_timeout=300,  # seconds
        max_evals=10,
        seed=1
    )

    # /BEGIN `Demo version of estimator.fit()`
    print('', file=sys.stderr)
    print('====Demo classification on Iris dataset====', file=sys.stderr)
 
    iterator = estimator.fit_iter(X_train, y_train)
    next(iterator)
 
    n_trial = 0
    while len(estimator.trials.trials) < estimator.max_evals:
        iterator.send(1)  # -- try one more model
        n_trial += 1
        print('Trial', n_trial, 'loss:', estimator.trials.losses()[-1], file=sys.stderr)
        #hpsklearn.scatter_error_vs_time(estimator)
        #hpsklearn.demo_support.bar_classifier_choice(estimator)
 
    estimator.retrain_best_model_on_full_data(X_train, y_train)
 
    # /END Demo version of `estimator.fit()`
 
    print('Test accuracy:', estimator.score(X_test, y_test), file=sys.stderr)
    print('Best model:',estimator.best_model(), file=sys.stderr)
    print('====End of demo====', file=sys.stderr)

test_demo_iris()
# -- flake8 eof

In [None]:
# LOAD FROM CSV FILE

import pandas as pd
input = "D:/machine_learning/datasets/iris.csv"

txt = pd.read_csv(input, sep=',', iterator=True, chunksize=10000)
data = pd.concat(txt, ignore_index=True)

In [None]:
# SIMPLE SPLITTING

train_perc = .7
train_size = math.floor(data.shape[0] * train_perc)
train, test = data[:train_size], data[train_size:]

# or

X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=.25, random_state=1)

In [None]:
# K-FOLD CROSS VALIDATION

kf = KFold(n_splits=10,shuffle=True)
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
    clf = tree.DecisionTreeClassifier()
    clf = clf.fit(X_train, y_train)
    sc=clf.score(X_test,y_test)

In [3]:
# HYPERPARAMETER & CLASSIFIER MODEL TUNING

from __future__ import print_function
from sklearn import datasets
from sklearn.model_selection import train_test_split
from hyperopt import tpe
import hpsklearn
import sys
from sklearn import preprocessing
from sklearn import utils

# https://github.com/hyperopt/hyperopt-sklearn

def split_and_tune(X, y):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=.25, random_state=1)
    tune(X_train, X_test, y_train, y_test)
    
def tune(X_train, X_test, y_train, y_test):
    estimator = hpsklearn.HyperoptEstimator(
        preprocessing=hpsklearn.components.any_preprocessing('pp'),
        classifier=hpsklearn.components.any_classifier('clf'),
        algo=tpe.suggest,
        trial_timeout=300,  # seconds
        max_evals=10,
        seed=1
    )

    print('\n====Demo classification on Iris dataset====', file=sys.stderr)
    iterator = estimator.fit_iter(X_train, y_train)
    next(iterator)
    n_trial = 0
    while len(estimator.trials.trials) < estimator.max_evals:
        iterator.send(1)  # -- try one more model
        n_trial += 1
        print('Trial', n_trial, 'loss:', estimator.trials.losses()[-1], file=sys.stderr)
        #hpsklearn.scatter_error_vs_time(estimator)
        #hpsklearn.demo_support.bar_classifier_choice(estimator)
    estimator.retrain_best_model_on_full_data(X_train, y_train)
    print('Test accuracy:', estimator.score(X_test, y_test), file=sys.stderr)
    print('Best model:',estimator.best_model(), file=sys.stderr)
    print('====End of demo====', file=sys.stderr)

D = datasets.load_iris()
split_and_tune(D.data, D.target)


====Demo classification on Iris dataset====
Trial 1 loss: 0.13043478260869568
Trial 2 loss: 0.08695652173913049
Trial 3 loss: 0.13043478260869568
Trial 4 loss: 0.04347826086956519
Trial 5 loss: 0.13043478260869568
Trial 6 loss: 0.7391304347826086
Trial 7 loss: 0.7391304347826086
Trial 8 loss: 0.4347826086956522
Trial 9 loss: 0.13043478260869568
Trial 10 loss: 0.13043478260869568
Test accuracy: 0.947368421053
Best model: {'learner': KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='euclidean',
           metric_params=None, n_jobs=1, n_neighbors=41, p=2,
           weights='distance'), 'preprocs': (MinMaxScaler(copy=True, feature_range=(0.0, 1.0)),), 'ex_preprocs': ()}
====End of demo====
