#***REGULARIZATION***

In [None]:
import numpy as np
import pandas as pd
import random
import time
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.datasets import load_diabetes, make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

##Ridge Regression












###Using linear Algebra


In [None]:
X,y = load_diabetes(return_X_y = True)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 94)

In [None]:
class LA_Ridge:
  def __init__(self, lamda):
    self.intercept = 0
    self.coef = 0
    self.lamda = lamda

  def fit(self, X_train, y_train):
    X_train = np.insert(X_train ,0,1, axis = 1)
    I = np.identity(X_train.shape[1])

    I[0][0] = 0 # this is done so as to make the intercept inafected by ridge regression as we only want to work on slopes

    result = np.linalg.solve(np.dot(X_train.T, X_train) + self.lamda * I,np.dot(X_train.T, y_train)) # np.linalg.inv does not work why?
    self.intercept = result[0]
    self.coef = result[1:]

    print(self.intercept)
    print(self.coef)

  def predict(self, X_test):
    return np.dot(X_test, self.coef) + self.intercept

md = LA_Ridge(lamda = .5)
md.fit(X_train, y_train)
y_pred = md.predict(X_test)
print(r2_score(y_test, y_pred))

153.18818420133334
[ 3.69388018e+01 -9.27098610e+01  3.60027906e+02  2.10217461e+02
 -2.26210992e-01 -5.30678780e+01 -1.48952518e+02  9.78502636e+01
  2.94375864e+02  1.17284099e+02]
0.4643265637843299


In [None]:
rr = Ridge(alpha = .5)
rr.fit(X_train,y_train)
y_pred = rr.predict(X_test)
print(rr.intercept_)
print(rr.coef_)
print(r2_score(y_test,y_pred))

153.1881842013333
[ 3.69388018e+01 -9.27098610e+01  3.60027906e+02  2.10217461e+02
 -2.26210992e-01 -5.30678780e+01 -1.48952518e+02  9.78502636e+01
  2.94375864e+02  1.17284099e+02]
0.46432656378433




###Ridge using Batch Gradient Descent





In [None]:
X,y = load_diabetes(return_X_y = True)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 86)

In [None]:
class GD_ridge:
  def __init__(self, lr = 0.01, epoch = 100, alpha = 0.001):
    self.intercept = 0
    self.coef = 0
    self.lr = lr
    self.epoch = epoch
    self.alpha = alpha

  def fit(self,X_train, y_train):

    X_train = np.insert(X_train,0,1,axis = 1)
    theta = np.ones(X_train.shape[1])

    for i in range(self.epoch):
      slope = np.dot(X_train.T, X_train).dot(theta) - np.dot(X_train.T, y_train) + self.alpha * theta
      theta -= self.lr * slope

    self.intercept = theta[0]
    self.coef = theta[1:]

    print(self.intercept)
    print(self.coef)

  def predict(self,X_test):
    return np.dot(X_test,self.coef) + self.intercept

md = GD_ridge(lr = 0.006, epoch = 500, alpha = 0.001)
md.fit(X_train, y_train)
y_pred = md.predict(X_test)
print(r2_score(y_test, y_pred))

151.413272059825
[  55.66574605 -198.42416821  512.71760603  361.69739479  -73.83820376
 -124.50674095 -208.9951343   135.61934792  387.59624578   65.25052075]
0.513322346658621


In [None]:
rr = Ridge(alpha = 0.001)
rr.fit(X_train,y_train)
y_pred = rr.predict(X_test)
print(rr.intercept_)
print(rr.coef_)
print(r2_score(y_test,y_pred))

151.07289224052326
[  59.05104661 -251.35995771  549.97021303  398.98458647 -928.60374309
  505.34154043  176.0164699   273.74229936  738.1193001    -7.12702851]
0.4927816151475921



### Ridge using Stochastic Greadient Descent



In [None]:
X,y = load_diabetes(return_X_y = True)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 46)

In [None]:
class SGD_Ridge:
  def __init__(self, lr = 0.01, epoch = 100, alpha = 0.001, adp_lr = False):
    self.lr = lr
    self.epoch = epoch
    self.alpha = alpha
    self.adp_lr = adp_lr

  def fit(self, X_train, y_train):
    X_train = np.insert(X_train,0,1,axis = 1)
    theta = np.ones(X_train.shape[1])
    self.start = time.time()
    lr = self.lr

    for i in range(self.epoch):
      for j in range(X_train.shape[0]):

        id = np.random.randint(0, X_train.shape[0])
        y_pred = np.dot(X_train[id], theta)

        if(self.adp_lr):
          lr = self.schedule()

        theta_diff = -np.dot((y_train[id]- y_pred),X_train[id]) + self.alpha * theta
        theta -= lr * 2* theta_diff

    self.intercept = theta[0]
    self.coef = theta[1:]

    print(self.intercept)
    print(self.coef)

  def predict(self, X_test):
    return np.dot(X_test, self.coef) + self.intercept

  def schedule(self):
    t0 = self.start
    t1 = time.time()
    return self.lr/(2+(t1-t0))


md = SGD_Ridge(lr = 0.05, epoch =200 , alpha = 0.0001, adp_lr = True)
md.fit(X_train,y_train)
y_pred = md.predict(X_test)
print(r2_score(y_test, y_pred))

146.12559990003587
[  12.69105892 -227.42541913  471.27976093  323.79017083  -12.98137163
 -102.31523105 -174.83999632  191.43037586  459.32623424   15.65936478]
0.41648226152181356


In [None]:
rr = Ridge(alpha = 0.001)
rr.fit(X_train,y_train)
y_pred = rr.predict(X_test)
print(rr.intercept_)
print(rr.coef_)
print(r2_score(y_test,y_pred))

150.4369100653396
[   9.03541908 -246.07376239  486.0336929   337.9820167  -530.08361463
  264.63714155   62.40554044  279.4862231   663.04285197   -3.22533894]
0.4343038677386277




###Ridge Regression using Mini Batch Gradient Descent



In [None]:
X,y = load_diabetes(return_X_y = True)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.25, random_state= 34)

In [None]:
class MBGD_Ridge:
  def __init__(self, batch_size,lr = 0.01, epoch = 100, alpha = 0.001, adp_lr = False):
    self.lr =lr
    self.epoch = epoch
    self.alpha = alpha
    self.batch_size = batch_size
    self.adp_lr = adp_lr

  def fit(self, X_train , y_train):
    X_train = np.insert(X_train,0,1,axis = 1)
    theta = np.ones(X_train.shape[1])
    batches = int( X_train.shape[0]/ self.batch_size)
    self.start = time.time()
    lr = self.lr

    for i in range(self.epoch):
      for j in range(batches):

        id = random.sample(range(X_train.shape[0]), self.batch_size)

        if(self.adp_lr):
          lr = self.schedule()

        theta_diff = np.dot( X_train[id].T , X_train[id] ).dot( theta ) - np.dot( X_train[id].T , y_train[id]) + self.alpha * theta
        theta -= lr * theta_diff

    self.intercept = theta[0]
    self.coef = theta[1:]

    print(self.intercept)
    print(self.coef)
    # print(lr)

  def predict(self, X_test):
    return np.dot( X_test, self.coef) + self.intercept

  def schedule(self):
    t0 = self.start
    t1 = time.time()
    return self.lr/(2+(t1-t0))

md = MBGD_Ridge(batch_size = 35, lr = 0.05, epoch = 500, alpha = 0.001, adp_lr = True)
md.fit(X_train, y_train)
y_pred = md.predict(X_test)
print(r2_score(y_test, y_pred))

153.73900007334655
[  10.89374798 -255.10693809  525.98024278  320.84250949  -95.13299042
  -64.89101395 -236.56305238   97.09599913  447.70203535   72.64174677]
0.4219172460498284


In [None]:
rr = Ridge(alpha = 0.001)
rr.fit(X_train,y_train)
y_pred = rr.predict(X_test)
print(rr.intercept_)
print(rr.coef_)
print(r2_score(y_test,y_pred))

154.11376978214633
[   8.57595351 -260.18286882  514.45920235  335.05745115 -753.65496586
  496.16568977   18.29971306  115.15537881  717.6926403    65.86689391]
0.4193293339744496


##Lasso Regression

In [None]:
X,y = load_diabetes(return_X_y = True)
# X,y = make_regression(n_samples = 100, n_features = 10, n_informative = 5,  random_state = 453)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.25, random_state = 453)

In [None]:
class Lasso_Reg:
  def __init__(self,alpha, lr = 0.01, epoch = 100):
    self.coef = 0
    self.intercept = 0
    self.alpha = alpha
    self.lr = lr
    self.epoch = epoch

  def fit(self, X_train, y_train):
    n = X_train.shape[0]
    self.coef = np.ones(X_train.shape[1])

    for i in range(self.epoch):
      y_hat = self.predict(X_train)
      coef_diff = np.ones(X_train.shape[1])

      intercept_diff = -2 * np.mean( y_train - y_hat)
      self.intercept -= self.lr * intercept_diff

      for j in range(X_train.shape[1]):
        rho  =  -2 * (np.dot( (y_train - y_hat), X_train[:,j]))/n
        if( self.coef[j] < 0 ):
          coef_diff[j] = (rho - self.alpha/n)
        elif( self.coef[j] > 0):
          coef_diff[j] =  (rho + self.alpha/n)
        else:
          pass

      self.coef -= self.lr * coef_diff

    self.coef = np.where(self.coef<1e-5, 0, self.coef)
    print(self.intercept)
    print(self.coef)

  def predict(self,X_test):
    return np.dot(X_test, self.coef) + self.intercept

md = Lasso_Reg(alpha = 0.01, lr = .01, epoch =100)
md.fit(X_train, y_train)
y_pred = md.predict(X_test)
print(r2_score(y_test, y_pred))

129.5638717339026
[2.21588205 1.41552522 5.00784072 3.97119033 2.36200016 2.18170493
 0.         4.10793497 4.92610078 3.98410469]
-0.1375167114534892


In [None]:
ls = Lasso(alpha = 0.01)
ls.fit(X_train, y_train)
y_pred = ls.predict(X_test)
print(ls.intercept_)
print(ls.coef_)
print(r2_score(y_test, y_pred))

150.99274711360812
[  -5.59217058 -199.38774165  500.50693658  241.95764926 -425.12652479
  138.6090779   -77.13651261  143.75708405  620.12419768   95.58918328]
0.5076830369605065


In [None]:
# Lasso Regression
class LassoRegression():
	def __init__(self, learning_rate, iterations, l1_penalty):
		self.learning_rate = learning_rate
		self.iterations = iterations
		self.l1_penalty = l1_penalty

	# Function for model training
	def fit(self, X, Y):
		# no_of_training_examples, no_of_features
		self.m, self.n = X.shape
		# weight initialization
		self.W = np.zeros(self.n)
		self.b = 0
		self.X = X
		self.Y = Y
		# gradient descent learning
		for i in range(self.iterations):
			self.update_weights()
		return self

	# Helper function to update weights in gradient descent
	def update_weights(self):
		Y_pred = self.predict(self.X)
		# calculate gradients
		dW = np.zeros(self.n)
		for j in range(self.n):
			if self.W[j] > 0:
				dW[j] = (-2 * (self.X[:, j]).dot(self.Y - Y_pred) +
						self.l1_penalty) / self.m
			else:
				dW[j] = (-2 * (self.X[:, j]).dot(self.Y - Y_pred) -
						self.l1_penalty) / self.m

		db = -2 * np.sum(self.Y - Y_pred) / self.m

		# update weights
		self.W = self.W - self.learning_rate * dW
		self.b = self.b - self.learning_rate * db
		return self

	# Hypothetical function h(x)
	def predict(self, X):
		return X.dot(self.W) + self.b


In [None]:
md = LassoRegression(learning_rate = 0.05, iterations = 500, l1_penalty = 0.1)
md.fit(X_train, y_train)
y_pred = md.predict(X_test)
print(r2_score(y_test, y_pred))

0.999999998162927
