In [2]:
import numpy as np
import pandas as pd

In [3]:
def costfunction(X,y,beta):
    '''Cost function for linear regression'''
    #Initialization of useful values 
    m = np.size(y)
    
    #Vectorized implementation
    h = X @ beta
    J = float((1./(2*m)) * (h - y).T @ (h - y));    
    return J;


def costFunctionReg(X,y,beta,lamda = 10):
    '''Cost function for ridge regression (regularized L2)'''
    #Initialization
    m = len(y) 
    J = 0
    
    #Vectorized implementation
    h = X @ beta
    J_reg = (lamda / (2*m)) * np.sum(np.square(beta))
    J = float((1./(2*m)) * (h - y).T @ (h - y)) + J_reg;
    return(J) 


def gradient_descent(X,y,beta,alpha = 0.0005,num_iters=1000):
    '''Gradient descent for linear regression'''
    #Initialisation of useful values 
    m = np.size(y)
    J_history = np.zeros(num_iters)
    beta_0_hist, beta_1_hist = [], [] #For plotting afterwards
    
    for i in range(num_iters):
        #Cost and intermediate values for each iteration
        J_history[i] = costfunction(X,y,beta)
        beta_0_hist.append(beta[0,0])
        beta_1_hist.append(beta[1,0])
        
        #Grad function in vectorized form
        h = X @ beta
        gradient = (1/m)*(X.T @ (h-y))
        beta = beta - alpha * gradient       
    return beta,J_history, beta_0_hist, beta_1_hist

def gradient_descent_reg(X,y,beta,alpha = 0.0005,lamda = 10,num_iters=1000):
    '''Gradient descent for ridge regression'''
    #Initialisation of useful values 
    m = np.size(y)
    J_history = np.zeros(num_iters)
    beta_0_hist, beta_1_hist = [], [] #Used for three D plot

    for i in range(num_iters):
        #Hypothesis function
        h = np.dot(X,beta)
        
        #Grad function in vectorized form
        beta = beta - alpha * (1/m)* (  (X.T @ (h-y)) + lamda * beta )
           
        #Cost function in vectorized form       
        J_history[i] = costFunctionReg(X,y,beta,lamda)
           
        #Calculate the cost for each iteration(used to plot convergence)
        beta_0_hist.append(beta[0,0])
        beta_1_hist.append(beta[1,0])   
    return beta ,J_history, beta_0_hist, beta_1_hist

def closed_form_solution(X,y):
    '''Closed form solution for linear regression'''
    return np.linalg.inv(X.T @ X) @ X.T @ y
    
def closed_form_reg_solution(X,y,lamda = 10): 
    '''Closed form solution for ridge regression'''
    m,n = X.shape
    I = np.eye((n))
    return (np.linalg.inv(X.T @ X + lamda * I) @ X.T @ y)[:,0]

def cost_l2(x,y):
    return x**2 + y**2

In [4]:
x = np.linspace(0,1,40)
noise = 1*np.random.uniform(  size = 40)
y = np.sin(x * 1.5 * np.pi ) 
y_noise = (y + noise).reshape(-1,1)

#Centering the y data
y_noise = y_noise - y_noise.mean()

#Design matrix is x, x^2
X = np.vstack((2*x,x**2)).T

#Nornalizing the design matrix to facilitate visualization
X = X / np.linalg.norm(X,axis = 0)


In [5]:
l = 10
#Computing the gradient descent
beta_result_reg,J_history_reg, beta_0, beta_1 = gradient_descent_reg(X,y_noise,np.array([7.,10.]).reshape(-1,1), 0.8,l,num_iters=5000)

In [6]:
beta_result_reg

array([[-0.14093456],
       [-0.22785737]])

In [7]:
#Computing the gradient descent
beta_result,J_history, beta_0, beta_1 = gradient_descent(X,y_noise,np.array([7,-10]).reshape(-1,1),alpha = 1,num_iters=5000)

In [8]:
beta_result

array([[ 12.52106254],
       [-14.76353856]])

In [12]:
cost_123 = cost_l2(X,y_noise)
cost_123

In [10]:
import seaborn as sns
sns.lineplot(range(100),cost_123)

Exception: Data must be 1-dimensional