# Regularized Linear and Logistic Regression

In [1]:
import numpy as np
import matplotlib.pyplot as plt

## Cost Function For Regularized Linear Regression

In [8]:
def compute_linear_regression_cost(x,y,w,b,lambda_):
    m,n = x.shape
    cost=0
    reg_cost=0
    for i in range(m):
        f_wb=np.dot(w,x[i])+b
        err=(f_wb-y[i])**2
        cost+=err
    cost=cost/(2*m)
    
    for j in range(n):
        reg_cost += (w[j])**2
    reg_cost = reg_cost*(lambda_/(2*m))
        
    cost_final = cost + reg_cost
    
    return cost_final

In [9]:
np.random.seed(1)
# We use np.random.seed to not change our random values generated whenever we re run the code
X_tmp = np.random.rand(5,6)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1]).reshape(-1,)-0.5
b_tmp = 0.5
lambda_tmp = 0.7
cost_tmp = compute_linear_regression_cost(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print("Regularized cost:", cost_tmp)

Regularized cost: 0.07917239320214275


## Cost Function For Regularized Logistic Regression

In [4]:
def sigmoid(z):
    g_z=1/(1+np.exp(-z))
    
    return g_z

In [5]:
def compute_logistic_regression_cost(x,y,w,b,lambda_):
    m,n=x.shape
    cost=0
    reg_cost=0
    
    for i in range(m):
        f_wb = np.dot(w,x[i])+b
        g_z = sigmoid(f_wb)
        
        cost += -y[i]*(np.log(g_z)) -(1-y[i])*np.log(1-g_z)
        
    cost=cost/m
    
    for j in range(n):
        reg_cost += w[j]**2
    
    reg_cost=reg_cost*(lambda_/(2*m))
    
    cost_final = cost + reg_cost
    
    return cost_final

In [6]:
np.random.seed(1)
X_tmp = np.random.rand(5,6)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1]).reshape(-1,)-0.5
b_tmp = 0.5
lambda_tmp = 0.7
cost_tmp = compute_logistic_regression_cost(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print("Regularized cost:", cost_tmp)

Regularized cost: 0.6850849138741673


## Gradient descent with regularization
The basic algorithm for running gradient descent does not change with regularization, it is:
$$\begin{align*}
&\text{repeat until convergence:} \; \lbrace \\
&  \; \; \;w_j = w_j -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial w_j}   \; & \text{for j := 0..n-1} \\ 
&  \; \; \;  \; \;b = b -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial b} \\
&\rbrace
\end{align*}$$
Where each iteration performs simultaneous updates on $w_j$ for all $j$.

What changes with regularization is computing the gradients.

### Gradient function for regularized linear regression

In [7]:
def compute_gradient_regularized_linear(x,y,w,b,lambda_):
    m,n = x.shape
    dj_dw = np.zeros((n,))
    dj_db = 0
    for i in range(m):
        f_wb = np.dot(w,x[i]) + b
        err = f_wb - y[i]
        for j in range(n):
            dj_dw[j] = dj_dw[j] + err*x[i,j]
        
        dj_db = dj_db + err
    dj_dw = dj_dw/m
    dj_db=dj_db/m
        
    for j in range(n):
        dj_dw[j] = dj_dw[j] + (lambda_/m)*w[j]
        
    return dj_dw,dj_db

In [8]:
np.random.seed(1)
X_tmp = np.random.rand(5,3)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1])
b_tmp = 0.5
lambda_tmp = 0.7
dj_dw_tmp, dj_db_tmp =  compute_gradient_regularized_linear(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print(f"dj_db: {dj_db_tmp}", )
print(f"Regularized dj_dw:\n {dj_dw_tmp.tolist()}", )

dj_db: 0.6648774569425726
Regularized dj_dw:
 [0.29653214748822276, 0.4911679625918033, 0.21645877535865857]


### Gradient function for regularized logicstic regression

In [9]:
def compute_gradient_regularized_logistic(x,y,w,b,lambda_):
    m,n = x.shape
    dj_dw = np.zeros(n)
    dj_db = 0
    for i in range(m):
        f_wb = np.dot(w,x[i]) + b
        g_z = sigmoid(f_wb)
        err = g_z - y[i]
        
        for j in range(n):
            dj_dw[j] = dj_dw[j] + err*x[i,j]
        
        dj_db = dj_db + err
    
    dj_dw = dj_dw/m
    dj_db = dj_db/m
    
    for j in range(n):
        dj_dw[j] = dj_dw[j] + (lambda_/m)*w[j]
        
    return dj_dw,dj_db

In [10]:
np.random.seed(1)
X_tmp = np.random.rand(5,3)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1])
b_tmp = 0.5
lambda_tmp = 0.7
dj_dw_tmp, dj_db_tmp =  compute_gradient_regularized_logistic(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print(f"dj_db: {dj_db_tmp}", )
print(f"Regularized dj_dw:\n {dj_dw_tmp.tolist()}", )

dj_db: 0.341798994972791
Regularized dj_dw:
 [0.17380012933994293, 0.32007507881566943, 0.10776313396851499]
