In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

# Step 1: Load the Dataset
dataset = pd.read_csv(r'C:\Users\Vishnu\Downloads\Iris.csv')



In [8]:
# Step 2: Data Preprocessing
# Check the first few rows and the data types
print(dataset.head())
print(dataset.dtypes)

# Convert Species to category codes (if not already numeric)
dataset['Species'] = dataset['Species'].astype('category').cat.codes

# Selecting the features and labels
X = dataset.iloc[:, 1:5].values  # Features (Sepal and Petal measurements)
Y = dataset.iloc[:, 5].values      # Labels (Species)

print(dataset.head())
print(dataset.dtypes)

   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  Species
0   1            5.1           3.5            1.4           0.2        0
1   2            4.9           3.0            1.4           0.2        0
2   3            4.7           3.2            1.3           0.2        0
3   4            4.6           3.1            1.5           0.2        0
4   5            5.0           3.6            1.4           0.2        0
Id                 int64
SepalLengthCm    float64
SepalWidthCm     float64
PetalLengthCm    float64
PetalWidthCm     float64
Species             int8
dtype: object
   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  Species
0   1            5.1           3.5            1.4           0.2        0
1   2            4.9           3.0            1.4           0.2        0
2   3            4.7           3.2            1.3           0.2        0
3   4            4.6           3.1            1.5           0.2        0
4   5            5.0           3.

In [22]:
dataset

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,0
1,2,4.9,3.0,1.4,0.2,0
2,3,4.7,3.2,1.3,0.2,0
3,4,4.6,3.1,1.5,0.2,0
4,5,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,2
146,147,6.3,2.5,5.0,1.9,2
147,148,6.5,3.0,5.2,2.0,2
148,149,6.2,3.4,5.4,2.3,2


In [10]:
# Step 3: Split the Dataset

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)


In [12]:
# Step 4: Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [14]:
# Step 5: Model Training
clf = LogisticRegression(random_state=0, solver='lbfgs', max_iter=1000)
clf.fit(X_train, Y_train)

In [16]:
# Step 6: Model Evaluation
Y_pred = clf.predict(X_test)
print("Confusion Matrix:\n", confusion_matrix(Y_test, Y_pred))
print("Accuracy Score:", accuracy_score(Y_test, Y_pred))
print("Classification Report:\n", classification_report(Y_test, Y_pred))

Confusion Matrix:
 [[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]
Accuracy Score: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00         6

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [20]:
# Step 7: Prediction 
# Example of predicting a new sample: SepalLength=5.0, SepalWidth=3.5, PetalLength=1.5, PetalWidth=0.2
new_sample = [[5.0, 3.5, 1.5, 0.2]]
scaled_sample = scaler.transform(new_sample)
predicted_species = clf.predict(scaled_sample)
print("Predicted Species for New Sample:", predicted_species)

Predicted Species for New Sample: [0]
