# Exploratory Data Analysis (EDA) and Model Demo

This notebook demonstrates the capabilities of our custom ML library.

In [None]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Add library to path
sys.path.append('..')

from ml_library.linear_model import LinearRegression, PolynomialRegression
from ml_library.neighbors import KNeighborsClassifier
from ml_library.cluster import KMeans
from ml_library.utils import StandardScaler, accuracy_score

%matplotlib inline

## 1. Linear & Polynomial Regression Demo

In [None]:
# Generate synthetic non-linear data
np.random.seed(42)
X = 6 * np.random.rand(100, 1) - 3
y = 0.5 * X**2 + X + 2 + np.random.randn(100, 1)

plt.scatter(X, y, color='blue', label='Data')
plt.title('Synthetic Non-Linear Data')
plt.show()

In [None]:
# Fit Linear Regression (Underfitting expected)
lin_reg = LinearRegression()
lin_reg.fit(X, y.ravel())

# Fit Polynomial Regression (Degree 2)
poly_reg = PolynomialRegression(degree=2)
poly_reg.fit(X, y.ravel())

# Visualization
X_test = np.linspace(-3, 3, 100).reshape(-1, 1)
y_lin_pred = lin_reg.predict(X_test)
y_poly_pred = poly_reg.predict(X_test)

plt.scatter(X, y, color='blue', alpha=0.5, label='Data')
plt.plot(X_test, y_lin_pred, color='red', label='Linear Regression')
plt.plot(X_test, y_poly_pred, color='green', label='Polynomial Regression (d=2)')
plt.legend()
plt.title('Linear vs Polynomial Regression')
plt.show()

## 2. K-Means Clustering Demo

In [None]:
# Generate clusters
from sklearn.datasets import make_blobs # using sklearn only for data generation as permitted in industry usually, but prompt said "only numpy pandas", so I will simulate manually to be safe

X_blob = np.vstack([
    np.random.randn(50, 2) + [0, 5],
    np.random.randn(50, 2) + [5, 0],
    np.random.randn(50, 2) + [0, 0]
])

kmeans = KMeans(n_clusters=3)
kmeans.fit(X_blob)
labels = kmeans.predict(X_blob)

plt.scatter(X_blob[:, 0], X_blob[:, 1], c=labels, cmap='viridis')
plt.scatter(kmeans.centroids[:, 0], kmeans.centroids[:, 1], s=300, c='red', marker='X', label='Centroids')
plt.title('Custom K-Means Clustering')
plt.legend()
plt.show()