In [None]:
# 🌸 Iris Flower Classification using Random Forest

## 🔍 Problem Statement:
The Iris dataset contains measurements of different types of Iris flowers:
- Sepal Length
- Sepal Width
- Petal Length
- Petal Width

Each flower belongs to one of the following 3 species:
- Setosa
- Versicolor
- Virginica

### 🎯 Goal:
We want to build a **Machine Learning model** that can:
- Learn from flower measurements
- Predict the correct species (Setosa / Versicolor / Virginica)
- Use **Random Forest Classifier** for prediction

### 📚 Dataset Info:
- This is a famous dataset used in machine learning.
- It is already available in sklearn or can be downloaded separately.
- We are using the `.csv` version here.

### 🛠️ Steps:
1. Import and explore the dataset
2. Visualize the data
3. Prepare features and target
4. Train a Random Forest model
5. Evaluate using accuracy and confusion matrix
6. Show the results

---


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier  # ✅ Correct
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# ✅ Correct




In [4]:
# 📌 Step 2: Load CSV file (local path)
data = pd.read_csv(r"D:\Download\archive (1)\Iris.csv")

# 📌 Step 3: Data ka pehla look
print("Top 5 Rows:\n", data.head())


Top 5 Rows:
    Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [7]:
data.describe()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


In [14]:
# Step 5: Split dataset into features (X) and target (y)
X = data.drop('Species', axis=1)  # All columns except 'Species'
y = data['Species']               # Target column

# Step 6: Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 7: Create a Random Forest Classifier model and train it
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Step 8: Make predictions on the test set
y_pred = model.predict(X_test)


In [15]:
# Step 9: Evaluate the model's performance
print("\n Accuracy Score:", accuracy_score(y_test, y_pred))
print("\n Classification Report:\n", classification_report(y_test, y_pred))


 Accuracy Score: 1.0

 Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30

