In [1]:
import numpy as np
import pandas as pd
np.random.seed(2)

In [2]:
data = pd.read_csv('student.csv')
X = data.drop('Writing',axis=1).values
Y = data['Writing'].values
X.shape, Y.shape

((1000, 2), (1000,))

In [3]:
def initialize_weights_and_bais(n):
    W = np.random.randn(n) #(n,1)
    b = 0
    return W,b

In [4]:
def cost_function(h,Y):
    "mean squared error"
    m = len(Y)
    loss = np.square(h-Y)
    cost = np.sum(loss)/(m)
    return cost

#### Output of Linear Regression is

$$  output(h_i) = \sum_{i=1}^n X_i^j.W_i + b  $$


$$  cost(J) = \frac{1}{m} \sum_{j=1}^m (Y^j - h^j)^2 = \frac{1}{m} \sum_{j=1}^m ( \sum_{i=1}^n Y^j - X_i^j.W_i - b)^2 $$


$$ \frac{\partial J}{\partial b} = \frac{1}{m} \sum_{j=1}^m -2(\sum_{i=1}^n Y^j - X_i^j.W_i - b) = - \frac{2}{m} \sum_{j=1}^m ( Y^j - h^j) $$


$$ \frac{\partial J }{\partial W_i} = \frac{1}{m} \sum_{j=1}^m -2X_i^j(\sum_{i=1}^n Y^j - X_i^j.W_i - b) = -\frac{2}{m} \sum_{j=1}^m X_i^j(Y^j - h^j) $$

### Example

Let $ h(x) = x.w + b $ (only 1 feature is there) 

$$ J = \frac{1}{m} \sum_{j=1}^m \big( y^j - h(x)^j \big)^2 $$

$$ \frac{\partial J}{\partial w} = \frac{1}{m} \sum_{j=1}^m 2 \big( y^j - h(x)^j \big) \frac{\partial}{\partial w} \big(  y^j - x^j.w - b \big) = - \frac{2}{m} \sum_{j=1}^m x^j \big( y^j -  h(x)^j \big) $$

$$ \frac{\partial J}{\partial b} = \frac{1}{m} \sum_{j=1}^m 2 \big( y^j - h(x)^j \big) \frac{\partial}{\partial b} \big(  y^j - x^j.w - b \big) = - \frac{2}{m} \sum_{j=1}^m \big( y^j - h(x)^j \big) $$


In [5]:
def get_output(X,W,b):
    return np.dot(X,W)+b
    
def optimize(W,b,X,Y,lr=0.01,lmda=0.1):
    """
    W = weights
    b = bias
    lr = learning rate
    lmda = regularization parameter
    """
    m = len(Y)
    h = get_output(X,W,b)
    
    dW = -2*np.dot( X.T, (Y-h) ) / m # (4,1)
    db = -2*np.sum( Y-h )  / m
    
    W = W - lr*dW
    b = b - lr*db
    return W,b

In [6]:
W,b = initialize_weights_and_bais(X.shape[1])
last_cost,tol,i = float('inf'),0.01,0
while True:
    h = get_output(X,W,b)
    cost = cost_function(h,Y)
    print(f"Iteration: {i}, Cost: {cost:.3f}")
    W,b = optimize(W,b,X,Y,lr=1e-5,lmda=0.5)
    if last_cost-cost < tol: break
    else: last_cost, i = cost, i+1

Iteration: 0, Cost: 10576.447
Iteration: 1, Cost: 6842.251
Iteration: 2, Cost: 4429.576
Iteration: 3, Cost: 2870.739
Iteration: 4, Cost: 1863.568
Iteration: 5, Cost: 1212.831
Iteration: 6, Cost: 792.385
Iteration: 7, Cost: 520.733
Iteration: 8, Cost: 345.215
Iteration: 9, Cost: 231.810
Iteration: 10, Cost: 158.536
Iteration: 11, Cost: 111.192
Iteration: 12, Cost: 80.600
Iteration: 13, Cost: 60.833
Iteration: 14, Cost: 48.058
Iteration: 15, Cost: 39.803
Iteration: 16, Cost: 34.466
Iteration: 17, Cost: 31.016
Iteration: 18, Cost: 28.785
Iteration: 19, Cost: 27.341
Iteration: 20, Cost: 26.406
Iteration: 21, Cost: 25.799
Iteration: 22, Cost: 25.405
Iteration: 23, Cost: 25.148
Iteration: 24, Cost: 24.980
Iteration: 25, Cost: 24.869
Iteration: 26, Cost: 24.795
Iteration: 27, Cost: 24.745
Iteration: 28, Cost: 24.711
Iteration: 29, Cost: 24.686
Iteration: 30, Cost: 24.668
Iteration: 31, Cost: 24.654
Iteration: 32, Cost: 24.643
Iteration: 33, Cost: 24.633


#### R squared - Coefficient of Determination
R-squared (R^2) is a statistical measure that represents the proportion of the variance for a dependent variable that's explained by an independent variable or variables in a regression model. 

$$ R^2 = \frac{ \text{Explained Sum of Squares (ESS)} }{\text{Total Sum of Squares (TSS)}} $$

$$  R^2 = 1 - \frac{ \text{Residual Sum of Squares (RSS)} }{\text{Total Sum of Squares (TSS)}}  $$

$$ R^2 = 1 - \frac{ \sum_i{ (y - h)^2 } }{  \sum_i{ (y-\bar y)^2} } $$

In [8]:
def rsquare( y_true, y_pred ):
    return 1 - ( np.sum( ( y_true - y_pred )**2  ) / np.sum( (y_true -y_true.mean())**2 )  )
rsquare( Y, get_output(X,W,b) )

0.893886642413261