In [None]:
# Import required libraries
import numpy as np
import pandas as pd

# ML datasets and models
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# Load inbuilt Iris dataset
iris = load_iris()

# Create DataFrame
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = iris.target

# Standardize features (important for PCA & K-Means)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
from sklearn.decomposition import PCA

# Reduce data to 2 principal components
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# Explained variance ratio
print("Explained Variance Ratio:", pca.explained_variance_ratio_)


Explained Variance Ratio: [0.72962445 0.22850762]


In [None]:
# Segmenatation Technique
from sklearn.cluster import KMeans

# Create K-Means model with 3 clusters (known from Iris dataset)
kmeans = KMeans(n_clusters=3, random_state=42)

# Fit model on PCA-reduced data
kmeans.fit(X_pca)

# Get cluster labels
clusters = kmeans.labels_

# Add clusters to DataFrame
X_clustered = pd.DataFrame(X_pca, columns=["PC1", "PC2"])
X_clustered["Cluster"] = clusters

print(X_clustered.head())


        PC1       PC2  Cluster
0 -2.264703  0.480027        1
1 -2.080961 -0.674134        2
2 -2.364229 -0.341908        2
3 -2.299384 -0.597395        2
4 -2.389842  0.646835        1


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Create Random Forest model
rf = RandomForestClassifier(
    n_estimators=100,      # number of trees
    random_state=42
)

# Train the model
rf.fit(X_train, y_train)

# Make predictions
y_pred = rf.predict(X_test)

# Evaluate model
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

