In [1]:
# Import required libraries
import streamlit as st  # Web app framework for creating interactive data applications
import pandas as pd  # Data manipulation and analysis library
from sklearn.datasets import load_iris  # Load the classic iris dataset
from sklearn.ensemble import RandomForestClassifier  # Machine learning classifier


@st.cache_data  # Streamlit decorator to cache the function result for better performance
def load_data():
    """
    Load and prepare the iris dataset for classification.
    
    Returns:
        tuple: (DataFrame with features and target, target class names)
    """
    iris = load_iris()  # Load the iris dataset (150 samples, 4 features, 3 classes)
    df = pd.DataFrame(iris.data, columns=iris.feature_names)  # Create DataFrame with feature data
    df["species"] = iris.target  # Add target column with numeric class labels (0, 1, 2)
    return df, iris.target_names  # Return DataFrame and human-readable class names




In [2]:
df, target_names = load_data()

2025-06-28 21:33:16.659 
  command:

    streamlit run /Users/apple/Desktop/scrubber/.venv/lib/python3.9/site-packages/ipykernel_launcher.py [ARGUMENTS]


In [3]:
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [4]:
df.iloc[:,:-1]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [5]:
df['species']

0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Name: species, Length: 150, dtype: int64

In [6]:
df['species'].value_counts()

species
0    50
1    50
2    50
Name: count, dtype: int64

In [8]:
sepal_length = 5.1
sepal_width = 3.5
petal_length = 1.4
petal_width = 0.2

input_data = [[sepal_length, sepal_width, petal_length, petal_width]] 


In [15]:
model = RandomForestClassifier()  # Create a Random Forest classifier instance
model.fit(df.iloc[:,:-1], df['species']) 
prediction = model.predict(input_data)
predicted_species = target_names[prediction[0]]
print(target_names[prediction[0]])

setosa


