In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from keras.utils import to_categorical
from sklearn.svm import SVC
import xgboost as xgb
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
iris = load_iris()
x = iris.data
y = iris.target

In [3]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

In [4]:
x_train.shape

(120, 4)

#***Logistic Regression***

In [5]:
lr = LogisticRegression(max_iter = 300)
param_grid = {
    'C' : [0.001, 0.01, 0.1, 1, 10, 100],
    'penalty' : ['l1', 'l2']
}

grid_search = GridSearchCV(lr, param_grid, cv = 5, scoring = 'accuracy')
grid_search.fit(x, y)
print("Best Parameter for Logistic Regression: ", grid_search.best_params_)

Best Parameter for Logistic Regression:  {'C': 100, 'penalty': 'l2'}


In [6]:
lr = LogisticRegression(max_iter = 300, C = 100, penalty = 'l2')
lr.fit(x_train, y_train)
train_pred = lr.predict(x_train)
test_pred = lr.predict(x_test)

print("Training Accuracy: ", accuracy_score(y_train, train_pred))
print("Test Accuracy: ", accuracy_score(y_test, test_pred))
print("Classification Report: \n", classification_report(y_train, train_pred))

Training Accuracy:  0.9833333333333333
Test Accuracy:  1.0
Classification Report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.98      0.98      0.98        41
           2       0.97      0.97      0.97        39

    accuracy                           0.98       120
   macro avg       0.98      0.98      0.98       120
weighted avg       0.98      0.98      0.98       120



#***K-Nearest Neighbors***

In [7]:
knn = KNeighborsClassifier()
param_grid = {
    'n_neighbors': np.arange(1, 50),
    'weights': ['uniform', 'distance'],
    'metric': ['cosine', 'infinity', 'sokalmichener', 'euclidean'],
    'p': [1, 2, 3]
}

grid_search = GridSearchCV(knn, param_grid, cv = 5)
grid_search.fit(x, y)
print("Best Parameter for K-Nearest Neighbors: ", grid_search.best_params_)

Best Parameter for K-Nearest Neighbors:  {'metric': 'infinity', 'n_neighbors': 5, 'p': 1, 'weights': 'uniform'}


In [8]:
knn = KNeighborsClassifier(metric = 'infinity', n_neighbors = 5, p = 1, weights = 'uniform')
knn.fit(x_train, y_train)
train_pred = knn.predict(x_train)
test_pred = knn.predict(x_test)

print("Training Accuracy: ", accuracy_score(y_train, train_pred))
print("Test Accuracy: ", accuracy_score(y_test, test_pred))
print("Classification Report: \n", classification_report(y_train, train_pred))

Training Accuracy:  0.975
Test Accuracy:  0.9666666666666667
Classification Report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.97      0.95      0.96        41
           2       0.95      0.97      0.96        39

    accuracy                           0.97       120
   macro avg       0.97      0.98      0.97       120
weighted avg       0.98      0.97      0.98       120



#***Support Vector Machines***

In [9]:
sup = SVC()
parameters = {
    'C' : [0.1, 1, 10],
    'kernel' : ['linear', 'rbf'],
    'gamma' : ['scale', 'auto']
}

grid_search = GridSearchCV(sup, parameters, cv = 5)
grid_search.fit(x, y)
print("Best Parameter for Support Vector Machines: ", grid_search.best_params_)

Best Parameter for Support Vector Machines:  {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}


In [10]:
knn = KNeighborsClassifier(metric = 'infinity', n_neighbors = 5, p = 1, weights = 'uniform')
knn.fit(x_train, y_train)
train_pred = knn.predict(x_train)
test_pred = knn.predict(x_test)

print("Training Accuracy: ", accuracy_score(y_train, train_pred))
print("Test Accuracy: ", accuracy_score(y_test, test_pred))
print("Classification Report: \n", classification_report(y_train, train_pred))

Training Accuracy:  0.975
Test Accuracy:  0.9666666666666667
Classification Report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.97      0.95      0.96        41
           2       0.95      0.97      0.96        39

    accuracy                           0.97       120
   macro avg       0.97      0.98      0.97       120
weighted avg       0.98      0.97      0.98       120



#***Naive Bayes***

In [11]:
NB = GaussianNB()
param_grid = {
    'var_smoothing': np.logspace(0, -9, num = 10)
}

grid_search = GridSearchCV(NB, param_grid, cv = 5)
grid_search.fit(x, y)
print("Best Parameter for Naive Bayes: ", grid_search.best_params_)

Best Parameter for Naive Bayes:  {'var_smoothing': 0.01}


In [12]:
NB = GaussianNB(var_smoothing = 0.01)
NB.fit(x_train, y_train)
train_pred = NB.predict(x_train)
test_pred = NB.predict(x_test)

print("Training Accuracy: ", accuracy_score(y_train, train_pred))
print("Test Accuracy: ", accuracy_score(y_test, test_pred))
print("Classification Report: \n", classification_report(y_train, train_pred))

Training Accuracy:  0.95
Test Accuracy:  1.0
Classification Report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.93      0.93      0.93        41
           2       0.92      0.92      0.92        39

    accuracy                           0.95       120
   macro avg       0.95      0.95      0.95       120
weighted avg       0.95      0.95      0.95       120



#***Decision Trees***

In [13]:
Dtree = DecisionTreeClassifier()
param_grid = {
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(Dtree, param_grid, cv = 5)
grid_search.fit(x, y)
print("Best Parameter for Decision Trees: ", grid_search.best_params_)

Best Parameter for Decision Trees:  {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5}


In [14]:
Dtree = DecisionTreeClassifier(max_depth = None, min_samples_leaf = 1, min_samples_split = 5)
Dtree.fit(x_train, y_train)
train_pred = Dtree.predict(x_train)
test_pred = Dtree.predict(x_test)

print("Training Accuracy: ", accuracy_score(y_train, train_pred))
print("Test Accuracy: ", accuracy_score(y_test, test_pred))
print("Classification Report: \n", classification_report(y_train, train_pred))

Training Accuracy:  0.9833333333333333
Test Accuracy:  1.0
Classification Report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.98      0.98      0.98        41
           2       0.97      0.97      0.97        39

    accuracy                           0.98       120
   macro avg       0.98      0.98      0.98       120
weighted avg       0.98      0.98      0.98       120



#***Random Forest***

In [15]:
rt = RandomForestClassifier(random_state = 0)
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(rt, param_grid, cv = 5)
grid_search.fit(x, y)
print("Best Parameter for Random Forest: ", grid_search.best_params_)

Best Parameter for Random Forest:  {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}


In [16]:
rt = RandomForestClassifier(max_depth = None, min_samples_leaf = 1, min_samples_split = 2, n_estimators = 100)
rt.fit(x_train, y_train)
train_pred = rt.predict(x_train)
test_pred = rt.predict(x_test)

print("Training Accuracy: ", accuracy_score(y_train, train_pred))
print("Test Accuracy: ", accuracy_score(y_test, test_pred))
print("Classification Report: \n", classification_report(y_train, train_pred))

Training Accuracy:  1.0
Test Accuracy:  1.0
Classification Report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       1.00      1.00      1.00        41
           2       1.00      1.00      1.00        39

    accuracy                           1.00       120
   macro avg       1.00      1.00      1.00       120
weighted avg       1.00      1.00      1.00       120



#***Gradient Boosting***

In [17]:
gb = GradientBoostingClassifier(random_state = 0)
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.1, 0.01, 0.001],
    'max_depth': [3, 4, 5]
}

grid_search = GridSearchCV(gb, param_grid, cv = 5)
grid_search.fit(x, y)
print("Best Parameter for Gradient Boosting Classifier: ", grid_search.best_params_)

Best Parameter for Gradient Boosting Classifier:  {'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 50}


In [19]:
gb = GradientBoostingClassifier(learning_rate = 0.1, max_depth = 4, n_estimators = 50)
gb.fit(x_train, y_train)
train_pred = gb.predict(x_train)
test_pred = gb.predict(x_test)

print("Training Accuracy: ", accuracy_score(y_train, train_pred))
print("Test Accuracy: ", accuracy_score(y_test, test_pred))
print("Classification Report: \n", classification_report(y_train, train_pred))

Training Accuracy:  1.0
Test Accuracy:  1.0
Classification Report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       1.00      1.00      1.00        41
           2       1.00      1.00      1.00        39

    accuracy                           1.00       120
   macro avg       1.00      1.00      1.00       120
weighted avg       1.00      1.00      1.00       120



#***XGBoost***

In [20]:
xg = xgb.XGBClassifier(random_state = 0)
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.1, 0.01, 0.001],
    'max_depth': [3, 4, 5]
}

grid_search = GridSearchCV(xg, param_grid, cv = 5)
grid_search.fit(x, y)
print("Best Parameter for XGBoost: ", grid_search.best_params_)

Best Parameter for XGBoost:  {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 50}


In [22]:
xg = xgb.XGBClassifier(random_state = 0, learning_rate = 0.01, max_depth = 3, n_estimators = 50)
xg.fit(x_train, y_train)
train_pred = xg.predict(x_train)
test_pred = xg.predict(x_test)

print("Training Accuracy: ", accuracy_score(y_train, train_pred))
print("Test Accuracy: ", accuracy_score(y_test, test_pred))
print("Classification Report: \n", classification_report(y_train, train_pred))

Training Accuracy:  0.9666666666666667
Test Accuracy:  1.0
Classification Report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.95      0.95      0.95        41
           2       0.95      0.95      0.95        39

    accuracy                           0.97       120
   macro avg       0.97      0.97      0.97       120
weighted avg       0.97      0.97      0.97       120

