In [1]:
#Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
# Load the Iris dataset
df = pd.read_csv('iris.csv')

In [3]:
#print the first few row of the dataset 
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
# Find the shape of the dataset
df.shape

(150, 5)

In [5]:
#Show the information about the dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [6]:
#Show the description about the dataset
df.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [7]:
X = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
y = df['species']

In [8]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [9]:
#Model Selection and Training
model = LogisticRegression()
model.fit(X_train, y_train)

In [10]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = model.score(X_test, y_test)
conf_matrix = confusion_matrix(y_test, y_pred)
cls_rep = classification_report(y_test, y_pred)

In [11]:
print(f"Accuracy: {accuracy}")
print(f"\nConfusion Matrix: {conf_matrix}")
print(f"\nClassification Report: {cls_rep}")

Accuracy: 1.0

Confusion Matrix: [[19  0  0]
 [ 0 13  0]
 [ 0  0  6]]

Classification Report:                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        19
Iris-versicolor       1.00      1.00      1.00        13
 Iris-virginica       1.00      1.00      1.00         6

       accuracy                           1.00        38
      macro avg       1.00      1.00      1.00        38
   weighted avg       1.00      1.00      1.00        38



In [12]:
# Get the sepal and petal measurements of a new Iris flower
sepal_length = 7
sepal_width = 3.2
petal_length = 4.2
petal_width = 1.4

In [13]:
# Create a new DataFrame with the measurements of the new Iris flower
new_iris_df = pd.DataFrame({'sepal_length': [sepal_length], 'sepal_width': [sepal_width], 'petal_length': [petal_length], 'petal_width': [petal_width]})

In [14]:
# Predict the species of the new Iris flower
species_prediction = model.predict(new_iris_df)[0]

In [15]:
# Print the species prediction
print('Species prediction:', species_prediction)

Species prediction: Iris-versicolor
