# Boosting

In [18]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, r2_score, accuracy_score
from sklearn.svm import  SVC
from sklearn.linear_model import LogisticRegression, LinearRegression, Ridge, ElasticNet
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_selector, make_column_transformer
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
import warnings
warnings.simplefilter('ignore')
from sklearn.ensemble import VotingClassifier, BaggingClassifier, BaggingRegressor, RandomForestClassifier, AdaBoostClassifier, AdaBoostRegressor
import matplotlib.pyplot as plt

## Kyphosis Dataset

In [3]:
kyp = pd.read_csv("./Cases/Kyphosis/Kyphosis.csv")
kyp.head()

Unnamed: 0,Kyphosis,Age,Number,Start
0,absent,71,3,5
1,absent,158,3,14
2,present,128,4,5
3,absent,2,5,1
4,absent,1,4,15


In [4]:
X = kyp.drop('Kyphosis', axis = 1)
y = kyp['Kyphosis']
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=24, stratify=y)

In [5]:
lr = LogisticRegression(penalty='l2')
ada = AdaBoostClassifier(estimator=lr, n_estimators=20, random_state=24)
ada.fit(X_train, y_train)
y_pred = ada.predict(X_test)
print(f1_score(y_test, y_pred, pos_label='present'))


0.5


In [6]:
ada = AdaBoostClassifier(n_estimators=20, random_state=24)
ada.fit(X_train, y_train)
y_pred = ada.predict(X_test)
print(f1_score(y_test, y_pred, pos_label='present'))


0.5714285714285714


## Sonar dataset

In [7]:
sonar = pd.read_csv("./Cases/Sonar/Sonar.csv")
sonar.head()

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V52,V53,V54,V55,V56,V57,V58,V59,V60,Class
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [8]:
X, y = sonar.drop('Class', axis=1), sonar['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24, stratify=y)

In [None]:
lr = LogisticRegression(penalty='l2')
ada = AdaBoostClassifier(estimator=lr, n_estimators=50, random_state=24)
ada.fit(X_train, y_train)
y_pred = ada.predict(X_test)
print(f1_score(y_test, y_pred, pos_label='R'))

0.7692307692307693


## Concrete Strength dataset

In [10]:
concrete = pd.read_csv("./Cases/Concrete Strength/Concrete_Data.csv")
concrete.head()

Unnamed: 0,Cement,Blast,Fly,Water,Superplasticizer,Coarse,Fine,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [11]:
X = concrete.drop('Strength', axis=1)
y = concrete['Strength']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24)

In [17]:
lr = LinearRegression()
elastic = ElasticNet()
estimator = [None, lr, elastic]
for i in estimator:
    ada = AdaBoostRegressor(estimator=i, random_state=24)
    ada.fit(X_train, y_train)
    y_pred = ada.predict(X_test)
    print(f'For estimator for {i}, R2_score : {r2_score(y_test, y_pred)}')

For estimator for None, R2_score : 0.747243739757216
For estimator for LinearRegression(), R2_score : 0.596872571772644
For estimator for ElasticNet(), R2_score : 0.5834490218112827
