In [1]:
from sklearn.datasets import load_iris
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [2]:
iris_bunch = load_iris()

#Creating a dataframe

iris_df = pd.DataFrame(iris_bunch.data , columns = iris_bunch.feature_names)
iris_df["Class"] = iris_bunch.target
iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),Class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [3]:
X = iris_df.drop("Class", axis=1)
y = iris_df["Class"]

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size =0.2, random_state=21)

In [12]:
print("Shape of X_train is : ",X_train.shape)
print("Shape of X_test is : ",X_test.shape)
print("Shape of y_train is : ",y_train.shape)
print("Shape of y_test is : ",y_test.shape)

Shape of X_train is :  (120, 4)
Shape of X_test is :  (30, 4)
Shape of y_train is :  (120,)
Shape of y_test is :  (30,)


In [13]:
from sklearn.ensemble import GradientBoostingClassifier

In [14]:
gb_model = GradientBoostingClassifier(n_estimators = 20,learning_rate=0.02)

gb_model.fit(X_train,y_train)

GradientBoostingClassifier(learning_rate=0.02, n_estimators=20)

In [15]:
y_test_pred_gb = gb_model.predict(X_test)

In [16]:
gb_accuracy = accuracy_score(y_test,y_test_pred_gb)

print("The prediction accuracy of the GBC model on the testing dataset is :" ,
      gb_accuracy)

The prediction accuracy of the GBC model on the testing dataset is : 0.9333333333333333


In [17]:
y_pred_train_gb = gb_model.predict(X_train)

train_accuracy_gb = accuracy_score(y_train,y_pred_train_gb)

print("The prediction accuracy of the GBC model on the training dataset is:" ,
      train_accuracy_gb)

The prediction accuracy of the GBC model on the training dataset is: 1.0


In [18]:
print("The classification report for the test dataset is: ")
print(classification_report(y_test,y_test_pred_gb))

The classification report for the test dataset is: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      0.83      0.91        12
           2       0.78      1.00      0.88         7

    accuracy                           0.93        30
   macro avg       0.93      0.94      0.93        30
weighted avg       0.95      0.93      0.93        30



In [22]:
print("The classification report for the train dataset is: ")
print(classification_report(y_train,y_pred_train_gb))

The classification report for the train dataset is: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        39
           1       1.00      1.00      1.00        38
           2       1.00      1.00      1.00        43

    accuracy                           1.00       120
   macro avg       1.00      1.00      1.00       120
weighted avg       1.00      1.00      1.00       120



# XGBoost Classifier

!pip install xgboost

In [24]:
from xgboost.sklearn import XGBClassifier

In [25]:
xgb_model = XGBClassifier(n_estimators = 20 , objective ="multi:softmax",
                         num_class = 3)

# objective='binary:logistic' if XGB clf is used for 2 class binary problems

# objective='multi:softmax' if XGB clf is used for more than 2 class problem
# and, along with the objective parameter, num_class parameter should also
# be defined.

#In our example, num_class = 3 because iris dataset has 3 classes.

In [26]:
xgb_model.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=20, n_jobs=0, num_class=3, num_parallel_tree=1,
              objective='multi:softprob', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=None, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [27]:
y_test_pred_xgb = xgb_model.predict(X_test)

In [28]:
xgb_accuracy = accuracy_score(y_test,y_test_pred_xgb)

print("The prediction accuracy of the XGB model on the testing dataset is :" ,
      xgb_accuracy)

The prediction accuracy of the XGB model on the testing dataset is : 0.9333333333333333


In [29]:
y_train_pred_xgb = xgb_model.predict(X_train)

xgb_train_accuracy = accuracy_score(y_train,y_train_pred_xgb)

print("The prediction accuracy of the XGB model on the testing dataset is :" ,
      xgb_train_accuracy)

The prediction accuracy of the XGB model on the testing dataset is : 1.0


In [30]:
print("The classification report for the test dataset is: ")
print(classification_report(y_test,y_test_pred_xgb))

The classification report for the test dataset is: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      0.83      0.91        12
           2       0.78      1.00      0.88         7

    accuracy                           0.93        30
   macro avg       0.93      0.94      0.93        30
weighted avg       0.95      0.93      0.93        30



In [31]:
print("The classification report for the train dataset is: ")
print(classification_report(y_train,y_train_pred_xgb))

The classification report for the train dataset is: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        39
           1       1.00      1.00      1.00        38
           2       1.00      1.00      1.00        43

    accuracy                           1.00       120
   macro avg       1.00      1.00      1.00       120
weighted avg       1.00      1.00      1.00       120



# End of discussion