In [1]:
# -----------------------------
# Step 1: Import Required Libraries
# -----------------------------
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

# -----------------------------
# Step 2: Load the Iris Dataset
# -----------------------------
df = pd.read_csv("Iris.csv")

# Preview the dataset
print("First 5 rows:")
print(df.head())

# -----------------------------
# Step 3: Drop Irrelevant Columns
# -----------------------------
# The "Id" column is not needed for prediction
df = df.drop(columns=["Id"])

# -----------------------------
# Step 4: Check for Missing Values
# -----------------------------
print("\nMissing values per column:")
print(df.isnull().sum())

# If there were missing values, we could impute them (this dataset usually has none, but add this to be safe)
imputer = SimpleImputer(strategy="mean")
df.iloc[:, :-1] = imputer.fit_transform(df.iloc[:, :-1])  # only impute numeric columns

# -----------------------------
# Step 5: Encode the Target Labels (Species)
# -----------------------------
# Use LabelEncoder to convert species names to numbers
label_encoder = LabelEncoder()
df['Species'] = label_encoder.fit_transform(df['Species'])

# Show final preprocessed data
print("\nAfter preprocessing:")
print(df.head())


First 5 rows:
   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa

Missing values per column:
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

After preprocessing:
   SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  Species
0            5.1           3.5            1.4           0.2        0
1            4.9           3.0            1.4           0.2        0
2            4.7           3.2            1.3           0.2        0
3            4.6           3.1            1.5           0.2        0
4            5.0     

In [2]:
# ----------------------------------------
# Step 1: Import Required Libraries
# ----------------------------------------
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report

# ----------------------------------------
# Step 2: Split Features and Target
# ----------------------------------------
X = df.drop("Species", axis=1)   # Features (4 columns)
y = df["Species"]                # Target (0, 1, 2)

# ----------------------------------------
# Step 3: Split into Training and Testing Sets
# ----------------------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ----------------------------------------
# Step 4: Initialize and Train the Model
# ----------------------------------------
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# ----------------------------------------
# Step 5: Make Predictions
# ----------------------------------------
y_pred = model.predict(X_test)

# ----------------------------------------
# Step 6: Evaluate the Model
# ----------------------------------------
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')  # for multi-class
recall = recall_score(y_test, y_pred, average='macro')

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")

# Optional: Detailed performance report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


Accuracy: 1.00
Precision: 1.00
Recall: 1.00

Classification Report:
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30

