In [18]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [2]:
data = pd.read_csv(r'/IrisNew.csv')
df = pd.DataFrame(data, columns=["Sepal Length","Sepal Width","Petal Length","Petal Width","Class"])
print(df)

     Sepal Length  Sepal Width  Petal Length  Petal Width      Class
0             4.3          3.0           1.1          0.1     setosa
1             4.4          2.9           1.4          0.2     setosa
2             4.4          3.0           1.3          0.2     setosa
3             4.4          3.2           1.3          0.2     setosa
4             4.5          2.3           1.3          0.3     setosa
..            ...          ...           ...          ...        ...
145           7.7          2.6           6.9          2.3  virginica
146           7.7          2.8           6.7          2.0  virginica
147           7.7          3.0           6.1          2.3  virginica
148           7.7          3.8           6.7          2.2  virginica
149           7.9          3.8           6.4          2.0  virginica

[150 rows x 5 columns]


In [4]:
#split the data into training and testing
X = df.values[:,0:4] # first 4 columns are independent variables
Y = df.values[:,4] # last column is dependent variable
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3,random_state=100)
#test size = 70% training and 30 percent testing


In [5]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [15]:
k = 3  # You can choose the number of neighbors (k) based on cross-validation.
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)

In [16]:
y_pred = knn.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Accuracy: 0.9777777777777777
Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        13
  versicolor       0.94      1.00      0.97        15
   virginica       1.00      0.94      0.97        17

    accuracy                           0.98        45
   macro avg       0.98      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45

Confusion Matrix:
[[13  0  0]
 [ 0 15  0]
 [ 0  1 16]]


In [21]:
# Train the Decision Tree model, criterion = Entropy
dt_model = DecisionTreeClassifier(criterion = "entropy",random_state=42)
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)

# Train the Decision Tree model, criterion = Gini Index
dt_model1 = DecisionTreeClassifier(criterion = "gini",random_state=42)
dt_model1.fit(X_train, y_train)
y_pred_dt1 = dt_model1.predict(X_test)

# Train the Logistic Regression model
lr_model = LogisticRegression(max_iter=1000, random_state=42)
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)

# Train the KNN model
k = 3
knn_model = KNeighborsClassifier(n_neighbors=k)
knn_model.fit(X_train, y_train)
y_pred_knn = knn_model.predict(X_test)

# Evaluate and compare the models
models = [("Decision Tree with criterion as entropy", y_pred_dt), ("Decision Tree with criterion as gini", y_pred_dt1),
 ("Logistic Regression", y_pred_lr), ("K-Nearest Neighbors", y_pred_knn)]

for model_name, y_pred in models:
    accuracy = accuracy_score(y_test, y_pred)
    print(f'{model_name} Accuracy: {accuracy}')

    print(f"{model_name} Classification Report:")
    print(classification_report(y_test, y_pred))

    print(f"{model_name} Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

Decision Tree with criterion as entropy Accuracy: 0.9555555555555556
Decision Tree with criterion as entropy Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        13
  versicolor       0.93      0.93      0.93        15
   virginica       0.94      0.94      0.94        17

    accuracy                           0.96        45
   macro avg       0.96      0.96      0.96        45
weighted avg       0.96      0.96      0.96        45

Decision Tree with criterion as entropy Confusion Matrix:
[[13  0  0]
 [ 0 14  1]
 [ 0  1 16]]
Decision Tree with criterion as gini Accuracy: 0.9555555555555556
Decision Tree with criterion as gini Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        13
  versicolor       0.93      0.93      0.93        15
   virginica       0.94      0.94      0.94        17

    accuracy                           0.96     