In [1]:
import numpy as np

### Cost Function with Regularization

The equation for the regularized cost function (linear regression) is: $$ J(w,b) = \frac {1}{2m} \displaystyle\sum_{i=0}^{m-1}(f_{w,b}(x^{(i)}) - y^{(i)})^2 + \frac {\lambda}{2m}\displaystyle\sum_{j=0}^{n-1}w_{j}^{2} $$ where: $$ f_{w,b}(x^{(i)}) = w \cdot {x^{(i)}} + b $$

In [2]:
def compute_cost_linear_reg(X, y, w, b, lambda_ = 1):

    m  = X.shape[0]
    n  = len(w)
    cost = 0.
    for i in range(m):
        f_wb_i = np.dot(X[i], w) + b                                   
        cost = cost + (f_wb_i - y[i])**2                                           
    cost = cost / (2 * m)                                             
 
    reg_cost = 0
    for j in range(n):
        reg_cost += (w[j]**2)                                        
    reg_cost = (lambda_/(2*m)) * reg_cost                             
    
    total_cost = cost + reg_cost                                      
    return total_cost                                                  

In [3]:
np.random.seed(1)
X_tmp = np.random.rand(5,6)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1]).reshape(-1,)-0.5
b_tmp = 0.5
lambda_tmp = 0.7
cost_tmp = compute_cost_linear_reg(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print("Regularized cost:", cost_tmp)

Regularized cost: 0.07917239320214277


### Gradient Descent with Regularization

Gradient Descent steps: $$ w_j = w_j - \alpha \frac {\partial J(w,b)}{\partial w_j}$$  $$ b = b - \alpha \frac {\partial J(w,b)}{\partial b} $$ 

Gradient Descent with Regularization: $$ \frac {\partial J(w,b)}{\partial w_{j}} = \frac {1}{m} \displaystyle\sum_{i=0}^{m-1}(f_{w,b}(x^{(i)}) - y^{(i)})x_{j}^{(i)} + \frac {\lambda}{m}w_j $$ 
$$ \frac {\partial J(w,b)}{\partial b} = \frac {1}{m} \displaystyle\sum_{i=0}^{m-1}(f_{w,b}(x^{(i)}) - y^{(i)}) $$  

In [None]:
def compute_gradient_linear_reg(X, y, w, b, lambda_): 

    m,n = X.shape          
    dj_dw = np.zeros((n,))
    dj_db = 0.

    for i in range(m):                             
        err = (np.dot(X[i], w) + b) - y[i]                 
        for j in range(n):                         
            dj_dw[j] = dj_dw[j] + err * X[i, j]               
        dj_db = dj_db + err                        
    dj_dw = dj_dw / m                                
    dj_db = dj_db / m   
    
    for j in range(n):
        dj_dw[j] = dj_dw[j] + (lambda_/m) * w[j]

    return dj_db, dj_dw

In [None]:
np.random.seed(1)
X_tmp = np.random.rand(5,3)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1])
b_tmp = 0.5
lambda_tmp = 0.7
dj_db_tmp, dj_dw_tmp =  compute_gradient_linear_reg(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print(f"dj_db: {dj_db_tmp}", )
print(f"Regularized dj_dw:\n {dj_dw_tmp.tolist()}", )