In [2]:
import numpy as np
import pandas as pd
from sklearn.ensemble import AdaBoostClassifier

In [3]:
iris = pd.read_csv("Iris.csv")

In [5]:
df = iris.copy()

In [6]:
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [7]:
df.shape

(150, 6)

In [8]:
df.isnull().sum()

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

In [9]:
df.Species.unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [10]:
df.columns

Index(['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
       'Species'],
      dtype='object')

In [11]:
df.SepalLengthCm.unique()

array([5.1, 4.9, 4.7, 4.6, 5. , 5.4, 4.4, 4.8, 4.3, 5.8, 5.7, 5.2, 5.5,
       4.5, 5.3, 7. , 6.4, 6.9, 6.5, 6.3, 6.6, 5.9, 6. , 6.1, 5.6, 6.7,
       6.2, 6.8, 7.1, 7.6, 7.3, 7.2, 7.7, 7.4, 7.9])

In [12]:
df.SepalWidthCm.unique()

array([3.5, 3. , 3.2, 3.1, 3.6, 3.9, 3.4, 2.9, 3.7, 4. , 4.4, 3.8, 3.3,
       4.1, 4.2, 2.3, 2.8, 2.4, 2.7, 2. , 2.2, 2.5, 2.6])

In [13]:
df.PetalLengthCm.unique()

array([1.4, 1.3, 1.5, 1.7, 1.6, 1.1, 1.2, 1. , 1.9, 4.7, 4.5, 4.9, 4. ,
       4.6, 3.3, 3.9, 3.5, 4.2, 3.6, 4.4, 4.1, 4.8, 4.3, 5. , 3.8, 3.7,
       5.1, 3. , 6. , 5.9, 5.6, 5.8, 6.6, 6.3, 6.1, 5.3, 5.5, 6.7, 6.9,
       5.7, 6.4, 5.4, 5.2])

In [14]:
df.PetalWidthCm.unique()

array([0.2, 0.4, 0.3, 0.1, 0.5, 0.6, 1.4, 1.5, 1.3, 1.6, 1. , 1.1, 1.8,
       1.2, 1.7, 2.5, 1.9, 2.1, 2.2, 2. , 2.4, 2.3])

## Label encoder to encode the target variables

In [15]:
from sklearn.preprocessing import LabelEncoder

In [16]:
LE = LabelEncoder()

In [17]:
df.Species = LE.fit_transform(df.Species)

In [19]:
df.Species.unique()

array([0, 1, 2])

In [20]:
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,0
1,2,4.9,3.0,1.4,0.2,0
2,3,4.7,3.2,1.3,0.2,0
3,4,4.6,3.1,1.5,0.2,0
4,5,5.0,3.6,1.4,0.2,0


## Train and Test Split

In [21]:
from sklearn.model_selection import train_test_split

In [24]:
X = df.drop(columns=["Id", "Species"], axis=1)
y = df.Species

In [26]:
X.shape, y.shape

((150, 4), (150,))

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=300)

In [28]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((105, 4), (45, 4), (105,), (45,))

## Model Building

In [29]:
ada = AdaBoostClassifier()

In [30]:
ada.fit(X_train,y_train)

AdaBoostClassifier()

In [31]:
y_train_pred = ada.predict(X_train)

In [32]:
y_train_pred

array([2, 2, 1, 0, 1, 1, 2, 0, 0, 1, 2, 0, 1, 1, 2, 0, 1, 2, 1, 0, 2, 0,
       0, 2, 0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 1, 0, 1, 0, 2, 0, 1, 2, 2, 2,
       2, 1, 0, 0, 1, 2, 2, 0, 0, 1, 1, 1, 0, 0, 2, 0, 0, 0, 2, 2, 2, 1,
       0, 2, 0, 1, 0, 2, 2, 1, 0, 1, 1, 0, 2, 0, 0, 1, 1, 0, 2, 1, 2, 1,
       1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 2, 1, 0, 2, 1, 0, 2])

In [35]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [36]:
confusion_matrix(y_train, y_train_pred)

array([[35,  0,  0],
       [ 0, 34,  1],
       [ 0,  2, 33]], dtype=int64)

In [37]:
print(classification_report(y_train, y_train_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        35
           1       0.94      0.97      0.96        35
           2       0.97      0.94      0.96        35

    accuracy                           0.97       105
   macro avg       0.97      0.97      0.97       105
weighted avg       0.97      0.97      0.97       105



In [39]:
from sklearn.svm import SVC

In [41]:
svc = SVC(probability=True,kernel='linear')

In [46]:
abc = AdaBoostClassifier(n_estimators=50, base_estimator=svc, learning_rate=0.01,random_state=32)

In [47]:
abc.fit(X_train,y_train)

AdaBoostClassifier(base_estimator=SVC(kernel='linear', probability=True),
                   learning_rate=0.01, random_state=32)

In [48]:
y_pred_train = abc.predict(X_train)

In [49]:
y_pred_test = abc.predict(X_test)

In [50]:
confusion_matrix(y_train,y_pred_train)

array([[35,  0,  0],
       [ 0, 28,  7],
       [ 0,  0, 35]], dtype=int64)

In [51]:
confusion_matrix(y_test,y_pred_test)

array([[15,  0,  0],
       [ 0, 14,  1],
       [ 0,  1, 14]], dtype=int64)

In [53]:
print(classification_report(y_train,y_pred_train))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        35
           1       1.00      0.80      0.89        35
           2       0.83      1.00      0.91        35

    accuracy                           0.93       105
   macro avg       0.94      0.93      0.93       105
weighted avg       0.94      0.93      0.93       105



In [54]:
print(classification_report(y_test,y_pred_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       0.93      0.93      0.93        15
           2       0.93      0.93      0.93        15

    accuracy                           0.96        45
   macro avg       0.96      0.96      0.96        45
weighted avg       0.96      0.96      0.96        45

