# ML Mini Projects – Decision Tree, k-NN, Clustering, Regression

## 1. Setup & Data Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_iris, load_diabetes, make_blobs
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, r2_score, mean_squared_error

sns.set(style="whitegrid")


## 2. Decision Tree Classifier

In [None]:
#Supervised ML Model using Iris Dataset
from sklearn.tree import DecisionTreeClassifier, plot_tree

# Load Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

# Predict
y_pred = dt_model.predict(X_test)

# Metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Plot tree
plt.figure(figsize=(12, 6))
plot_tree(dt_model, feature_names=iris.feature_names, class_names=iris.target_names, filled=True)
plt.show()


## 3. k-Nearest Neighbors Classifier

In [None]:
#Supervised K-NN ML model with Iris Dataset
from sklearn.neighbors import KNeighborsClassifier

# Train model
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(X_train, y_train)

# Predict
y_pred_knn = knn_model.predict(X_test)

# Metrics
print("k-NN Accuracy:", accuracy_score(y_test, y_pred_knn))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_knn))
print("Classification Report:\n", classification_report(y_test, y_pred_knn))


## 4. Clustering (KMeans)

In [None]:
#Unsupervised ML KMeans Model with Synthetic Blobs Dataset
from sklearn.cluster import KMeans

# Create synthetic data
X_blob, _ = make_blobs(n_samples=300, centers=3, random_state=42)

# Apply KMeans
kmeans = KMeans(n_clusters=3, random_state=42)
y_kmeans = kmeans.fit_predict(X_blob)

# Plot clusters
plt.figure(figsize=(8, 6))
plt.scatter(X_blob[:, 0], X_blob[:, 1], c=y_kmeans, cmap='viridis', s=50)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], color='red', marker='x', s=200)
plt.title('KMeans Clustering')
plt.show()


## 5. Linear Regression

In [None]:
#Supervised Linear Regression Model with Diabetes Dataset 
from sklearn.linear_model import LinearRegression

# Load diabetes dataset
diabetes = load_diabetes()
X_reg, y_reg = diabetes.data[:, np.newaxis, 2], diabetes.target

# Train/test split
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)

# Train model
lr_model = LinearRegression()
lr_model.fit(X_train_r, y_train_r)

# Predict
y_pred_lr = lr_model.predict(X_test_r)

# Metrics
print("R² Score:", r2_score(y_test_r, y_pred_lr))
print("MSE:", mean_squared_error(y_test_r, y_pred_lr))

# Plot
plt.scatter(X_test_r, y_test_r, color='black', label='Actual')
plt.plot(X_test_r, y_pred_lr, color='blue', linewidth=2, label='Predicted')
plt.title('Linear Regression - Diabetes Data')
plt.legend()
plt.show()


## 6. Final Summary – ML Mini Projects

This notebook demonstrates four fundamental machine learning algorithms across three major ML categories:

---

### 1. Supervised Learning – Classification
#### Decision Tree Classifier (using Iris Dataset)
- Learned patterns from labeled flower species
- Achieved high accuracy and visualized the decision tree
- Clearly separated classes based on petal length and width

#### k-Nearest Neighbors (k-NN Classifier, k=3)
- Classified flower species based on the majority of nearby data points
- Achieved perfect accuracy (100%) on the Iris test set

---

### 2. Unsupervised Learning – Clustering
#### KMeans Clustering (using Synthetic Blob Data)
- Automatically grouped unlabeled data into 3 clusters
- Successfully visualized clusters and centroids
- Demonstrated the concept of grouping based on proximity in feature space

---

### 3. Supervised Learning – Regression
#### Linear Regression (using Diabetes Dataset)
- Predicted disease progression using medical data
- First used a single feature (univariate regression), then expanded to all features (multivariate)
- Evaluated model performance using R² score and Mean Squared Error (MSE)

---

### Key Takeaways:
- Explored **three core ML types**: classification, clustering, regression
- Applied **real-world datasets** and clean visualizations
- Understood the relationship between **data structure** and **model choice**
- Gained hands-on experience using **scikit-learn, matplotlib, seaborn, pandas**