In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Load the dataset
data=pd.read_csv("C:/Users/yashwanth_BADETI/Downloads/Iris.csv")

In [3]:
# Explore the dataset
data.info()
data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


Unnamed: 0,id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
data.describe()

Unnamed: 0,id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


In [5]:
# Drop the 'Id' column as it's not useful for prediction
data=data.drop(columns=['id'])

In [6]:
data.columns

Index(['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
       'Species'],
      dtype='object')

In [7]:
# Split the data into features (X) and target (y)
X = data.drop(columns=['Species'])
y = data['Species']

In [8]:
# Split the dataset into training and testing sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Initialize models
logistic_model = LogisticRegression(max_iter=200)
decision_tree_model = DecisionTreeClassifier()
random_forest_model = RandomForestClassifier()

In [10]:
# Train models
logistic_model.fit(X_train, y_train)
decision_tree_model.fit(X_train, y_train)
random_forest_model.fit(X_train, y_train)

In [11]:
# Make predictions
logistic_predictions = logistic_model.predict(X_test)
decision_tree_predictions = decision_tree_model.predict(X_test)
random_forest_predictions = random_forest_model.predict(X_test)

In [12]:
# Evaluate models
logistic_accuracy = accuracy_score(y_test, logistic_predictions)
decision_tree_accuracy = accuracy_score(y_test, decision_tree_predictions)
random_forest_accuracy = accuracy_score(y_test, random_forest_predictions)

In [13]:
# Print results
print("Model Performance:")
print(f"Logistic Regression Accuracy: {logistic_accuracy * 100:.2f}%")
print(f"Decision Tree Accuracy: {decision_tree_accuracy * 100:.2f}%")
print(f"Random Forest Accuracy: {random_forest_accuracy * 100:.2f}%")

Model Performance:
Logistic Regression Accuracy: 100.00%
Decision Tree Accuracy: 100.00%
Random Forest Accuracy: 100.00%


In [15]:
# Classification reports
print("\nClassification Reports:")
print("Logistic Regression:\n", classification_report(y_test, logistic_predictions))
print("Decision Tree:\n", classification_report(y_test, decision_tree_predictions))
print("Random Forest:\n", classification_report(y_test, random_forest_predictions))



Classification Reports:
Logistic Regression:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30

Decision Tree:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30

Random Forest:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Ir

In [16]:
accuracies = {
    "Logistic Regression": logistic_accuracy,
    "Decision Tree": decision_tree_accuracy,
    "Random Forest": random_forest_accuracy
}

In [17]:
best_model = max(accuracies, key=accuracies.get)
print(f"\nThe best-performing model is: {best_model} with an accuracy of {accuracies[best_model] * 100:.2f}%")


The best-performing model is: Logistic Regression with an accuracy of 100.00%


In [21]:
if best_model == "Logistic Regression":
    best_model_predictions = logistic_predictions
elif best_model == "Decision Tree":
    best_model_predictions = decision_tree_predictions
elif best_model == "Random Forest":
    best_model_predictions = random_forest_predictions

# Print the true labels and the corresponding predictions of the best model
comparison_df = pd.DataFrame({
    "True Labels": y_test,
    "Predictions": best_model_predictions
})

print("\nComparison of True Labels and Predictions by the Best Model:")
print(comparison_df)


Comparison of True Labels and Predictions by the Best Model:
         True Labels      Predictions
73   Iris-versicolor  Iris-versicolor
18       Iris-setosa      Iris-setosa
118   Iris-virginica   Iris-virginica
78   Iris-versicolor  Iris-versicolor
76   Iris-versicolor  Iris-versicolor
31       Iris-setosa      Iris-setosa
64   Iris-versicolor  Iris-versicolor
141   Iris-virginica   Iris-virginica
68   Iris-versicolor  Iris-versicolor
82   Iris-versicolor  Iris-versicolor
110   Iris-virginica   Iris-virginica
12       Iris-setosa      Iris-setosa
36       Iris-setosa      Iris-setosa
9        Iris-setosa      Iris-setosa
19       Iris-setosa      Iris-setosa
56   Iris-versicolor  Iris-versicolor
104   Iris-virginica   Iris-virginica
69   Iris-versicolor  Iris-versicolor
55   Iris-versicolor  Iris-versicolor
132   Iris-virginica   Iris-virginica
29       Iris-setosa      Iris-setosa
127   Iris-virginica   Iris-virginica
26       Iris-setosa      Iris-setosa
128   Iris-virginica   Iri