<a href="https://colab.research.google.com/github/aarontavel/DATA441/blob/main/Tavel_HMW1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Github Link: https://aarontavel.github.io/DATA441/

#### 1. Coding problem: implement a gradient descent method for Ridge Regression by using the PyTorch library. Your implementation should be a class that has the required methods “.fit” and “.predict”. You should include an application of your code to a data set.



In [1]:
import torch as torch

from sklearn.preprocessing import StandardScaler

In [87]:
class Ridge:

  def __init__(self, lr, epoch = 1000, alpha = 1.0, intercept = True):
   self.lr = lr
   self.epoch = epoch
   self.alpha = alpha
   self.ss = StandardScaler()
   self.intercept = intercept

  def fit(self, x, y):

    x_scaled = self.ss.fit_transform(x)

    x_tens = torch.tensor(x_scaled, dtype=torch.float32) # to accomodate for non-torch array data
    y_tens = torch.tensor(y, dtype=torch.float32)

    n,m = x_tens.shape # Get number of rows, features

    if self.intercept:
      x_tens = torch.cat([torch.ones(n, 1), x_tens], dim = 1) #adds intercept
      self.w = torch.ones((m+1, 1), dtype=torch.float32) # Initializes weights if intercept
    else:
      self.w = torch.ones((m, 1), dtype=torch.float32) #weights if no intercept

    #return(x_tens, self.w) # for testing purposes

    for i in range(self.epoch):
      y_pred = x_tens @ self.w

      error = y_pred - y_tens

      mse_loss = torch.mean(error ** 2)

      ridge_factor = self.alpha * torch.sum(self.w ** 2)

      x_transp = x_tens.T

      gradient = (2/n) * (x_transp @ error) + 2 * self.alpha * self.w

      self.w = self.w - self.lr * gradient

  def predict(self, x):

    x_scaled = self.ss.transform(x)

    x_tens = torch.tensor(x_scaled, dtype = torch.float32)

    if self.intercept:
      x_tens = torch.cat([torch.ones(x_tens.shape[0], 1), x_tens], dim = 1)

    return (x_tens @ self.w)

In [88]:
# Testing Ridge witih a data set

import pandas as pd

ss = StandardScaler()

data = pd.read_csv("/content/drive/MyDrive/DATA 441/Module_01/Data/cars")

x = data.loc[:,'cyl':'wt'].values

y = data['mpg'].values

In [89]:
model = Ridge(lr=0.001, epoch = 1000, alpha = 0.05)

In [90]:
x_scaled = ss.fit_transform(x)

model.fit(x_scaled,y)

In [91]:
predictions = model.predict(x_scaled)

In [None]:
predictions

In [93]:
mse = torch.mean((torch.tensor(y) - predictions)**2)
print("\nMean Squared Error (MSE):", mse.item())


Mean Squared Error (MSE): 11.172518525934255


#### 2. Complete the exercise provided in the Application to Locally Weighted Regression notebook and test the method on a data set, for example, the one provided in class.

In [139]:
# Libraries of functions need to be imported
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from scipy.spatial import Delaunay
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.metrics import mean_squared_error as mse
from scipy import linalg
from scipy.interpolate import interp1d, LinearNDInterpolator, NearestNDInterpolator
from sklearn.decomposition import PCA

# the following line(s) are necessary if you want to make SKlearn compliant functions
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted

In [148]:
# Epanechnikov Kernel
def Epanechnikov(x):
  return np.where(np.abs(x)>1,0,3/4*(1-np.abs(x)**2))

In [136]:
def kernel_function(xi,x0,kern, tau):
    return kern(dist(xi,x0)/(2*tau))

In [135]:
def weights_matrix(x,x_new,kern,tau):
  if np.isscalar(x_new):
    return kernel_function(x,x_new,kern,tau)
  else:
    n = len(x_new)
    return np.array([kernel_function(x,x_new[i],kern,tau) for i in range(n)])

In [174]:
def weight_function(u,v,kern=Gaussian,tau=0.5):
    return kern(dist(u,v)/(2*tau))

In [169]:
class Lowess:
    def __init__(self, kernel = Gaussian, tau=0.05):
        self.kernel = kernel
        self.tau = tau

    def fit(self, x, y):
        kernel = self.kernel
        tau = self.tau
        self.xtrain_ = x
        self.yhat_ = y

    def predict(self, x_new):
        check_is_fitted(self)
        x = self.xtrain_
        y = self.yhat_
        lm = linear_model.Ridge(alpha=0.001)
        w = weight_function(x,x_new,self.kernel,self.tau)

        if np.isscalar(x_new):
          lm.fit(np.diag(w)@(x.reshape(-1,1)),np.diag(w)@(y.reshape(-1,1)))
          yest = lm.predict([[x_new]])[0][0]
        else:
          n = len(x_new)
          yest_test = np.zeros(n)
          #Looping through all x-points
          for i in range(n):
            lm.fit(np.diag(w[:,i])@x,np.diag(w[:,i])@y)
            yest_test[i] = lm.predict(x_new[i].reshape(1,-1))
        return yest_test

In [190]:
data = pd.read_csv("/content/drive/MyDrive/DATA 441/Module_01/Data/cars")
x = data.loc[:,'cyl':'wt'].values
y = data['mpg'].values

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


In [192]:
mms = MinMaxScaler()

In [194]:
model = Lowess(kernel=Epanechnikov,tau=0.02)
x_scaled = mms.fit_transform(x)
model.fit(x_scaled,y)

In [195]:
yhat = model.predict(x_scaled)

mse(yhat,y)

0.09062583836281353