## Read the dataset into the Python environment:

In [1]:
import pandas as pd
# Read the dataset
dataset = pd.read_excel('iris.xls')

## Do the necessary pre-processing steps:

In [2]:
# Check the basic details of the dataset
print(dataset.head()) # Print the first few rows of the dataset

    SL   SW   PL   PW Classification
0  5.1  3.5  1.4  0.2    Iris-setosa
1  4.9  3.0  1.4  0.2    Iris-setosa
2  NaN  3.2  1.3  0.2    Iris-setosa
3  4.6  3.1  1.5  0.2    Iris-setosa
4  5.0  3.6  1.4  0.2    Iris-setosa


In [3]:
print(dataset.info()) # Display information about the dataset (e.g., column␣↪names, data types)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   SL              143 non-null    float64
 1   SW              144 non-null    float64
 2   PL              144 non-null    float64
 3   PW              150 non-null    float64
 4   Classification  150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
None


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# Check for missing values
print(dataset.isnull().sum())

SL                7
SW                6
PL                6
PW                0
Classification    0
dtype: int64


In [5]:
# Drop rows with missing values
dataset.dropna(inplace=True)

In [7]:
# Separate features (SL, SW, PL, PW) and the target variable (Classification)
X = dataset[['SL', 'SW', 'PL', 'PW']]
y = dataset['Classification']
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)
# Perform feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Evaluate multiple classification algorithms to find the best result:

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
# Initialize the classifiers
classifiers = {
'Logistic Regression': LogisticRegression(),
'Decision Tree': DecisionTreeClassifier(),
'Random Forest': RandomForestClassifier(),
'Support Vector Machine': SVC()
}
# Train and evaluate each classifier
results = {}
for clf_name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    results[clf_name] = accuracy
# Print the results
for clf_name, accuracy in results.items():
    print(f'{clf_name}: {accuracy}')
# Find the best performing model
best_model = max(results, key=results.get)
print(f'Best Model: {best_model}')

Logistic Regression: 1.0
Decision Tree: 1.0
Random Forest: 1.0
Support Vector Machine: 1.0
Best Model: Logistic Regression
