# Hyper-parameter Optimization(HPO) Exercise


In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('seaborn-white')

In [3]:
# Carseats Sales Prediction dataset
df3 = pd.read_csv('Data/Carseats.csv').drop('Unnamed: 0', axis=1)

df3['High'] = df3.Sales.map(lambda x: 1 if x>8 else 0)
df3.ShelveLoc = pd.factorize(df3.ShelveLoc)[0]

df3.Urban = df3.Urban.map({'No':0, 'Yes':1})
df3.US = df3.US.map({'No':0, 'Yes':1})
X = df3.drop(['Sales', 'High'], axis=1)
y = df3.High

X.describe()

Unnamed: 0,CompPrice,Income,Advertising,Population,Price,ShelveLoc,Age,Education,Urban,US
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,124.975,68.6575,6.635,264.84,115.795,1.3075,53.3225,13.9,0.705,0.645
std,15.334512,27.986037,6.650364,147.376436,23.676664,0.833475,16.200297,2.620528,0.456614,0.479113
min,77.0,21.0,0.0,10.0,24.0,0.0,25.0,10.0,0.0,0.0
25%,115.0,42.75,0.0,139.0,100.0,1.0,39.75,12.0,0.0,0.0
50%,125.0,69.0,5.0,272.0,117.0,2.0,54.5,14.0,1.0,1.0
75%,135.0,91.0,12.0,398.5,131.0,2.0,66.0,16.0,1.0,1.0
max,175.0,120.0,29.0,509.0,191.0,2.0,80.0,18.0,1.0,1.0


In [None]:
print(y.values)

### Task1: Grid search CV

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report



In [None]:
from sklearn.model_selection import GridSearchCV


In [None]:
# show 3D wireframe figure
from mpl_toolkits.mplot3d import axes3d

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

ax.plot_wireframe(x_values, y_values, z_values)

### Task2: Random search CV

In [None]:
from scipy import stats
from sklearn.model_selection import RandomizedSearchCV



In [None]:
from mpl_toolkits.mplot3d import axes3d

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

ax.scatter(x_values, y_values, z_values)

### (Optional) Task3: CVs with SVM

In [None]:
from sklearn.svm import SVC
# Generating test data for avoiding heavy computation
np.random.seed(8)
X = np.random.randn(200,2)
X[:100] = X[:100] +2
X[101:150] = X[101:150] -2
y = np.concatenate([np.repeat(-1, 150), np.repeat(1,50)])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=2)

plt.scatter(X[:,0], X[:,1], s=70, c=y, cmap=plt.cm.Paired)
plt.xlabel('X1')
plt.ylabel('X2');

In [None]:
# Grid search CV here

In [None]:
# Randomized search CV here


## Bayesian optimization (with TPE)

* The first time you install the *hyperopt* package with **pip** as following (**Just once!**):
```
(your env) prompt> pip install hyperopt
```

In [None]:
import hyperopt
from hyperopt import hp
from hyperopt import fmin, tpe, rand

In [None]:
# define an objective function
def objective(args):
    case, val = args
    if case == 'case 1':
        return val
    else:
        return val ** 2

In [None]:
# define a search space
space = hp.choice('a',
    [
        ('case 1', 1 + hp.lognormal('c1', 0, 1)),
        ('case 2', hp.uniform('c2', -10, 10))
    ])

In [None]:
# minimize the objective over the space
hpo_algo = rand.suggest #tpe.suggest
best = fmin(objective, space, algo=hpo_algo, max_evals=100)
print ("best x, y:{}".format(hyperopt.space_eval(space, best)))

If you see the result (the values may be different at each trial), installation is complete successfully. 

Before you start to HPO, kindly refer to the [manual](http://hyperopt.github.io/hyperopt/)