# Classification

## Part 1

In [None]:
from sklearn.datasets import load_iris
from sklearn.ensemble import AdaBoostClassifier
from sklearn.cross_validation import train_test_split

iris = load_iris()

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
                                                    train_size=0.75, test_size=0.25)

ada = AdaBoostClassifier(n_estimators=100,
                         learning_rate=.5,
                         random_state=10)

ada.fit(X_train, y_train)

ada.score(X_test, y_test)

## Part 2

In [None]:
from sklearn.model_selection import GridSearchCV
import numpy as np

param_grid = {'n_estimators': range(50, 550, 50),
             'learning_rate': np.arange(.1, 1.1, .1)}

model_a = GridSearchCV(AdaBoostClassifier(random_state=10), param_grid)
model_a.fit(X_train, y_train)

best_index = np.argmax(model_a.cv_results_["mean_test_score"])

print("Best index:", model_a.cv_results_["params"][best_index])
print("Mean test score:", max(model_a.cv_results_["mean_test_score"]))
print("Results:", model_a.score(X_test, y_test))

# Regression

In [None]:
def get_cv_score(model, k):
    
    '''Gets the three-fold cross validation scores for a regression model.'''
    
    cv_scores = cross_val_score(model, boston.data, boston.target, cv=k)
    print(model)
    print("CV scores:", cv_scores)
    print("Mean CV score:", np.mean(cv_scores))
    print()
    
    return(np.mean(cv_scores))

In [None]:
models = {"Ridge": ridge_reg,
         "Elastic": elastic_reg,
         "Support Vector": sv_reg,
         "K-nearest neighbors": knn_reg,
         "Random Forests": rf_reg,
         "AdaBoost": ab_reg,
         "Neural Net": nn_reg}

cv_scores = {model: get_cv_score(models[model], 5) for model in models.keys()}

In [None]:
max_score = max(cv_scores, key=lambda k: cv_scores[k])

print("Best CV Score:")
print(max_score + ": " + str(cv_scores[max_score]))
print()

print("Test Results:")
best_model = models[max_score].fit(X_train, y_train)
print(best_model.score(X_test, y_test))

# Clustering

In [None]:
from sklearn.cluster import DBSCAN

dbscan = DBSCAN(eps=.2)
dbscan.fit(noisy_moons)
zero = np.array([point for label, point in zip(dbscan.labels_, noisy_moons) if label == 0])
one = np.array([point for label, point in zip(dbscan.labels_, noisy_moons) if label == 1])

fig = plt.figure()
ax1 = fig.add_subplot(111)

ax1.scatter(*zero.T, s=50, c='b', label='zero')
ax1.scatter(*one.T, s=50, c='r', label='one')
plt.legend(loc='upper left')
plt.show()