<a href="https://colab.research.google.com/github/anushk218/anushk18.github.io/blob/main/Iris.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score

In [3]:
flower_data = pd.read_csv('Iris.csv')
# Clean up column names
flower_data.columns = [col.replace('Cm', '').lower() for col in flower_data.columns]
# Preview
print(flower_data.sample(5))

      id  sepallength  sepalwidth  petallength  petalwidth          species
58    59          6.6         2.9          4.6         1.3  Iris-versicolor
134  135          6.1         2.6          5.6         1.4   Iris-virginica
75    76          6.6         3.0          4.4         1.4  Iris-versicolor
99   100          5.7         2.8          4.1         1.3  Iris-versicolor
81    82          5.5         2.4          3.7         1.0  Iris-versicolor


In [4]:
# Simplify the species names
flower_data['species'] = flower_data['species'].str.replace('Iris-', '')

# Separate features and target
X = flower_data[['sepallength', 'sepalwidth', 'petallength', 'petalwidth']]
y = flower_data['species']

# Create training/test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=10)

In [5]:
# Initialize model
flower_identifier = DecisionTreeClassifier(max_depth=3)

# Train the model
flower_identifier.fit(X_train, y_train)

In [6]:
# Make predictions
predictions = flower_identifier.predict(X_test)

# Calculate accuracy
correct_predictions = sum(predictions == y_test)
total_samples = len(y_test)
accuracy = correct_predictions / total_samples

print(f"Model correctly identified {accuracy:.1%} of flowers")

Model correctly identified 97.4% of flowers


In [7]:
from collections import Counter

# Find misclassified flowers
wrong_predictions = y_test[predictions != y_test]
print("Most common errors:")
print(Counter(wrong_predictions))

Most common errors:
Counter({'virginica': 1})


In [8]:
# Sample flower measurements (sepallength, sepalwidth, petallength, petalwidth)
unknown_flowers = [
    [5.2, 3.4, 1.3, 0.2],
    [6.7, 3.1, 4.7, 1.5],
    [7.7, 2.8, 6.7, 2.0]
]

# Get predictions
identified_species = flower_identifier.predict(unknown_flowers)

print("\nPredicted species:")
for i, species in enumerate(identified_species, 1):
    print(f"Flower {i}: {species.title()}")


Predicted species:
Flower 1: Setosa
Flower 2: Versicolor
Flower 3: Virginica


