# AdaBoost regression

https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostRegressor.html

In [1]:
import pandas as pd
import matplotlib.pyplot as plt

concrete = pd.read_csv('data/concrete_data.csv')
concrete.head()

Unnamed: 0,cement,slag,flyash,water,superplasticizer,coarseaggregate,fineaggregate,age,csMPa
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [2]:
X = concrete.drop('csMPa', axis=1)
Y = concrete['csMPa']

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [4]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor

ada_reg = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4), 
                            n_estimators=100, 
                            learning_rate=1.0)

ada_reg.fit(x_train, y_train)
y_pred = ada_reg.predict(x_test)

from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.798783261909414

In [5]:
ada_reg = AdaBoostRegressor(DecisionTreeRegressor(max_depth=2), 
                            n_estimators=200, 
                            learning_rate=0.5)

ada_reg.fit(x_train, y_train)
y_pred = ada_reg.predict(x_test)

from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.6603899828239691

# AdaBoost classification

https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html

In [6]:
import pandas as pd

bank_data = pd.read_csv('data/bank_data_processed.csv')
bank_data.head()

Unnamed: 0,Age,Income,Family,Education,Mortgage,Securities Account,CD Account,Online,CreditCard
0,34,180,1,3,0,0,0,0,0
1,38,130,4,3,134,0,0,0,0
2,46,193,2,3,0,0,0,0,0
3,38,119,1,2,0,0,1,1,1
4,42,141,3,3,0,1,1,1,0


In [7]:
X = bank_data.drop('CreditCard', axis=1)
Y = bank_data['CreditCard']

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [8]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), 
                             n_estimators=100, 
                             algorithm='SAMME', 
                             learning_rate=1.0)

ada_clf.fit(x_train, y_train)
y_pred = ada_clf.predict(x_test)

from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.7708333333333334

In [9]:
ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), 
                             n_estimators=100, 
                             algorithm='SAMME.R', 
                             learning_rate=0.5)

ada_clf.fit(x_train, y_train)

y_pred = ada_clf.predict(x_test)
accuracy_score(y_test, y_pred)

0.75