<br/>

## Importing necessary modules

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings('ignore')

<br/>

## Load the dataset
We will work with **Iris** dataset.

In [2]:
df = pd.read_csv('C:/Users/Mehedi Hassan Galib/Desktop/Python/datas/iris.csv')

<br/>

## Reconnaissance
<br/>

##### Head

In [3]:
df.head()

Unnamed: 0,Id,sepal_length,sepal_width,petal_length,petal_width,species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


<br/>

##### Shape

In [4]:
df.shape

(150, 6)

<br/>

##### Basic statistics of the dataset

In [5]:
df.describe()

Unnamed: 0,Id,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


<br/>

##### Data types

In [6]:
df.dtypes

Id                int64
sepal_length    float64
sepal_width     float64
petal_length    float64
petal_width     float64
species          object
dtype: object

<br/>

##### Checking the null values

In [7]:
df.isnull().sum()

Id              0
sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

<br/>

## Splitting the dataset into explanatory (X) and response (y) variable
We will use **sepal_length**, **sepal_width**, **petal_length**, **petal_width** as explanatory variable and **species** as response variable.

In [8]:
X = df.iloc[:, 1:5]
y = df.iloc[:, -1]

<br/>

## Scaling

In [9]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_X = scaler.fit_transform(X)

<br/>

## Splitting into train and test set

In [10]:
X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size = 0.2, random_state = 0, stratify = y)

<br/>

# AdaBoostClassifier

In [11]:
# AdaBoostClassifier object
ada1 = AdaBoostClassifier(n_estimators = 100, learning_rate = 1)


# Train the model
model1 = ada1.fit(X_train, y_train)


# Prediction of train set
y1_pred_train = ada1.predict(X_train)

# Prediction of test set
y1_pred_test = ada1.predict(X_test)


# Accuracy Score
print('Trainig Accuracy : {}'.format(accuracy_score(y_train, y1_pred_train).round(4)))
print('\nTesting Accuracy : {}'.format(accuracy_score(y_test, y1_pred_test).round(4)))

Trainig Accuracy : 0.9667

Testing Accuracy : 0.9333


<br/>

# AdaBoostGrading with various n_estimators and learning rate

In [12]:
n_est = [800, 900, 1000]
lr = [0.01, 0.05, 0.09, 0.1, 1]
training_accuracy = []
testing_accuracy = []


for i in n_est:
    for j in lr:
        # AdaBoostClassifier object
        ada2 = AdaBoostClassifier(n_estimators = i, learning_rate = j)

        # Train the model
        model2 = ada2.fit(X_train, y_train)


        # Prediction for training set
        y2_pred_train = ada2.predict(X_train)
        
        # Prediction for testing set
        y2_pred_test = ada2.predict(X_test)

        # Accuracy Score
        accuracy_train = accuracy_score(y_train, y2_pred_train)
        accuracy_test = accuracy_score(y_test, y2_pred_test)
        
        
        # Append to the list
        training_accuracy.append(accuracy_train)
        testing_accuracy.append(accuracy_test)
        
# Printing the max score
print('Training accuracy : {}'.format(np.max(accuracy_train).round(4)))
print('Testing accuracy : {}'.format(np.max(accuracy_test).round(4)))

Training accuracy : 0.9667
Testing accuracy : 0.9333


<br/>

# Changing the base estimator
**Note:**  
In AdaBoost, the default base estimator is **Decision Tree**. We can change it by changing the parameter value of **base_estimator**.

In [13]:
# Logistic Regression Object
lr = LogisticRegression()

# AdaBoostClassifier object
ada3 = AdaBoostClassifier(n_estimators = 100, base_estimator = lr, learning_rate = 1)


# Train the model
model3 = ada3.fit(X_train, y_train)


# Prediction of train set
y3_pred_train = ada3.predict(X_train)

# Prediction of test set
y3_pred_test = ada3.predict(X_test)


# Accuracy Score
print('Trainig Accuracy : {}'.format(accuracy_score(y_train, y3_pred_train).round(4)))
print('\nTesting Accuracy : {}'.format(accuracy_score(y_test, y3_pred_test).round(4)))

Trainig Accuracy : 0.8833

Testing Accuracy : 0.9333
