In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from sklearn.preprocessing import Normalizer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV

In [3]:
df = pd.read_csv('../data_processing/cleaned-data.csv', index_col=0)

y = df.G3.values
y.reshape(-1, 1)
X = df.drop(['G1', 'G2', 'G3'], axis=1).values

In [9]:
# Setup the pipeline steps: steps
steps = [('norm', Normalizer(copy=True, norm='l1')),
         ('adab', AdaBoostClassifier(base_estimator=None, algorithm='SAMME.R', random_state=0))]

# Create the pipeline: pipeline 
pipeline = Pipeline(steps)

# Specify the hyperparameter space
parameters = {'adab__learning_rate':(.11, .1, .099),
             'adab__n_estimators':(900, 1000, 1100)}

# Create train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Create the GridSearchCV object: yeg_cv
grade_predict_cv = GridSearchCV(pipeline, param_grid=parameters, scoring='neg_mean_squared_error', cv=3)

# Fit to the training set
grade_predict_cv.fit(X_train, y_train)

# Compute and print the metrics
mse = grade_predict_cv.score(X_test, y_test)
print("Tuned adaBoost estimators: {}".format(grade_predict_cv.best_params_))
print("Tuned adaBoost Mean Squared Error: {}".format(mse))
print('Mean Error:', np.sqrt(abs(mse)))



Tuned adaBoost estimators: {'adab__learning_rate': 0.1, 'adab__n_estimators': 1000}
Tuned adaBoost Mean Squared Error: -11.601851851851851
Mean Error: 3.40614912354874
