======================================================
üîµ PART 1 ‚Äî LINEAR REGRESSION (FROM PDF THEORY)
======================================================

In [None]:
np.random.seed(0)

X = 2 * np.random.rand(100,1)
y = 4 + 3*X + np.random.randn(100,1)

X_b = np.c_[np.ones((100,1)), X]  # Add bias term

## Cost Function (From PDF)

$$J(\theta) = \frac{1}{2m} \sum (h_\theta(x) - y)^2$$

In [None]:
def compute_cost(X, y, theta):
    m = len(y)
    predictions = X.dot(theta)
    return (1/(2*m)) * np.sum((predictions - y)**2)

## Batch Gradient Descent (Vectorized)

From PDF update rule:

$$\theta := \theta - \alpha \frac{\partial J}{\partial \theta}$$

In [None]:
def gradient_descent(X, y, theta, learning_rate, iterations):
    m = len(y)
    
    for i in range(iterations):
        gradients = (1/m) * X.T.dot(X.dot(theta) - y)
        theta = theta - learning_rate * gradients
        
    return theta

theta = np.random.randn(2,1)
theta = gradient_descent(X_b, y, theta, 0.1, 1000)

print("Theta using Gradient Descent:\n", theta)

## Normal Equation (Closed Form)

$$\theta = (X^T X)^{-1} X^T y$$

In [None]:
theta_normal = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
print("Theta using Normal Equation:\n", theta_normal)

In [None]:
y_pred = X_b.dot(theta)

plt.scatter(X, y)
plt.plot(X, y_pred, color='red')
plt.title("Linear Regression - CS229 Implementation")
plt.xlabel("X")
plt.ylabel("y")
plt.show()

======================================================
üîµ PART 2 ‚Äî STOCHASTIC GRADIENT DESCENT (FROM PDF)
======================================================

In [None]:
def stochastic_gradient_descent(X, y, theta, learning_rate, epochs):
    m = len(y)
    
    for epoch in range(epochs):
        for i in range(m):
            xi = X[i:i+1]
            yi = y[i:i+1]
            
            gradient = xi.T.dot(xi.dot(theta) - yi)
            theta = theta - learning_rate * gradient
            
    return theta

theta_sgd = np.random.randn(2,1)
theta_sgd = stochastic_gradient_descent(X_b, y, theta_sgd, 0.01, 50)

print("Theta using SGD:\n", theta_sgd)

======================================================
üîµ PART 3 ‚Äî LOGISTIC REGRESSION (FROM PDF)
======================================================

In [None]:
X, y = make_classification(n_samples=500, n_features=2,
                           n_redundant=0, n_clusters_per_class=1)

X = np.c_[np.ones((X.shape[0],1)), X]
y = y.reshape(-1,1)

## Sigmoid Function

$$g(z) = \frac{1}{1 + e^{-z}}$$

In [None]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

## Logistic Cost Function (Cross Entropy)

In [None]:
def logistic_cost(X, y, theta):
    m = len(y)
    h = sigmoid(X.dot(theta))
    return -(1/m) * np.sum(y*np.log(h) + (1-y)*np.log(1-h))

## Logistic Gradient Descent

In [None]:
def logistic_gradient_descent(X, y, theta, lr, iterations):
    m = len(y)
    
    for i in range(iterations):
        gradients = (1/m) * X.T.dot(sigmoid(X.dot(theta)) - y)
        theta = theta - lr * gradients
        
    return theta

theta_log = np.zeros((X.shape[1],1))
theta_log = logistic_gradient_descent(X, y, theta_log, 0.1, 1000)

print("Logistic Theta:\n", theta_log)

## Accuracy & Report

In [None]:
predictions = (sigmoid(X.dot(theta_log)) >= 0.5).astype(int)

print("Accuracy:", accuracy_score(y, predictions))
print(classification_report(y, predictions))

======================================================
üîµ PART 4 ‚Äî GENERATIVE MODEL (GDA) FROM PDF
======================================================

Gaussian Discriminant Analysis assumption:

Class conditional distribution is Gaussian.

Estimate:
- œÜ (class prior)
- Œº‚ÇÄ, Œº‚ÇÅ (means)
- Œ£ (shared covariance)

## Implement GDA

In [None]:
def gda_train(X, y):
    m, n = X.shape
    
    phi = np.mean(y)
    
    mu0 = np.mean(X[y.flatten()==0], axis=0)
    mu1 = np.mean(X[y.flatten()==1], axis=0)
    
    sigma = np.zeros((n,n))
    
    for i in range(m):
        xi = X[i]
        if y[i] == 0:
            sigma += np.outer(xi - mu0, xi - mu0)
        else:
            sigma += np.outer(xi - mu1, xi - mu1)
            
    sigma = sigma / m
    
    return phi, mu0, mu1, sigma

phi, mu0, mu1, sigma = gda_train(X[:,1:], y)

print("Phi:", phi)
print("Mu0:", mu0)
print("Mu1:", mu1)
print("Sigma:\n", sigma)

## Final Conclusion

This notebook implemented key CS229 concepts:

- Linear Regression
- Gradient Descent (Batch & Stochastic)
- Normal Equation
- Logistic Regression
- Generative Learning Algorithm (GDA)

All implementations were derived directly from mathematical formulations 
without using built-in regression models.

This demonstrates deep conceptual and mathematical understanding of 
Machine Learning foundations.