In [1]:
from google.colab import files
import os

uploaded = files.upload()

file_name = list(uploaded.keys())[0]
print(f'User uploaded file "{file_name}" with length {len(uploaded[file_name])} bytes')

# Save the file to the current working directory
with open(file_name, 'wb') as f:
    f.write(uploaded[file_name])

print(f'File saved to {os.path.join(os.getcwd(), file_name)}')

Saving Iris.csv to Iris.csv
User uploaded file "Iris.csv" with length 5107 bytes
File saved to /content/Iris.csv


In [2]:
# =================== Iris Flower Classification ===================
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
import joblib   # to save model

# 1️⃣ Load Dataset
df = pd.read_csv("Iris.csv")   # Make sure Iris.csv is in the same directory
print("\nFirst 5 Rows:\n", df.head())

# Drop unnecessary column if present (like Id)
if 'Id' in df.columns:
    df.drop('Id', axis=1, inplace=True)

# 2️⃣ Features & Target
X = df.drop('Species', axis=1)
y = df['Species']

# Encode Target
le = LabelEncoder()
y = le.fit_transform(y)   # setosa=0, versicolor=1, virginica=2

# 3️⃣ Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 4️⃣ Check Class Balance
class_counts = np.bincount(y_train)
print("\nClass distribution in training set:", class_counts)

# Apply SMOTE only if dataset is imbalanced
if max(class_counts) - min(class_counts) > 0:
    print("⚠️ Dataset is imbalanced. Applying SMOTE...")
    sm = SMOTE(random_state=42)
    X_train, y_train = sm.fit_resample(X_train, y_train)
    print("✅ After SMOTE, class counts:", np.bincount(y_train))
else:
    print("✅ Dataset is already balanced. SMOTE not applied.")

# 5️⃣ Feature Scaling (optional but improves model stability)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 6️⃣ Train Random Forest Model
model = RandomForestClassifier(
    n_estimators=200,
    random_state=42,
    class_weight="balanced",   # extra protection against imbalance
    max_depth=8
)
model.fit(X_train, y_train)

# 7️⃣ Evaluation
y_pred = model.predict(X_test)
print("\n🎯 Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# 8️⃣ Save Model & Scaler for Future Use
joblib.dump(model, "iris_model.pkl")
joblib.dump(scaler, "iris_scaler.pkl")
joblib.dump(le, "iris_label_encoder.pkl")
print("\n✅ Model, Scaler & Label Encoder Saved Successfully!")

# Example Prediction
sample = np.array([[5.1, 3.5, 1.4, 0.2]])   # Example measurement
sample_scaled = scaler.transform(sample)
pred_species = le.inverse_transform(model.predict(sample_scaled))
print("\n🌸 Predicted Species for sample:", pred_species[0])



First 5 Rows:
    Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa

Class distribution in training set: [40 40 40]
✅ Dataset is already balanced. SMOTE not applied.

🎯 Accuracy: 0.9

Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       0.82      0.90      0.86        10
 Iris-virginica       0.89      0.80      0.84        10

       accuracy                           0.90        30
      macro avg       0.90      0.90      0.90        30
   weighted avg       0.90      0.90  



In [4]:
import pandas as pd

# Use the same feature order and names
columns = ['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm']

test_samples_df = pd.DataFrame([
    [5.0, 3.4, 1.5, 0.2],
    [4.7, 3.2, 1.3, 0.2],
    [5.1, 3.8, 1.6, 0.3],
    [6.0, 2.9, 4.5, 1.5],
    [5.7, 2.8, 4.1, 1.3],
    [6.1, 3.0, 4.7, 1.4],
    [6.5, 3.0, 5.8, 2.2],
    [7.1, 3.1, 5.9, 2.1],
    [6.7, 3.3, 5.7, 2.5]
], columns=columns)

test_scaled = scaler.transform(test_samples_df)
predictions = le.inverse_transform(model.predict(test_scaled))

for i, pred in enumerate(predictions, start=1):
    print(f"Sample {i}: Predicted → {pred}")


Sample 1: Predicted → Iris-setosa
Sample 2: Predicted → Iris-setosa
Sample 3: Predicted → Iris-setosa
Sample 4: Predicted → Iris-versicolor
Sample 5: Predicted → Iris-versicolor
Sample 6: Predicted → Iris-versicolor
Sample 7: Predicted → Iris-virginica
Sample 8: Predicted → Iris-virginica
Sample 9: Predicted → Iris-virginica


In [5]:
from google.colab import files

# Download the saved files
files.download("iris_model.pkl")
files.download("iris_scaler.pkl")
files.download("iris_label_encoder.pkl")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>