# PyTorch Regularized Regression

In my previous [notebook](https://www.kaggle.com/njelicic/pure-numpy-ols) you learned how we can use just NumPy to fit a linear regression model by using linear algebra. 

In this notebook I demonstrate how to fit a  regression model with 'base' PyTorch (so no torch.nn). Here, I use Gradient Descent to find the parameters for the model. 

The LinearRegression class supports: OLS (penalty=None), LASSO (penalty='l1') and  Ridge (penalty='l2'). 

Again, I try to follow the Sklearn API (model.fit() -> model.predict()) in my implementation. 

**Note: For learing purposes. If you want to score high in this comp I suggest stacking as much boosted trees as possible 😅** 

In [None]:
import numpy as np 
import pandas as pd 
import torch
import seaborn as sns

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
def preprocess(df):
    df.drop('id',inplace=True,axis=1) #drop id columns
    
    if 'target' in df.columns: #if training, store targets in y
        y = df['target'].values
        df.drop('target',inplace=True,axis=1)

    else:
        y = None
    
    X = df.values 

    
    return X,y

In [None]:
train_df = pd.read_csv('/kaggle/input/tabular-playground-series-jan-2021/train.csv')
test_df = pd.read_csv('/kaggle/input/tabular-playground-series-jan-2021/test.csv')
sub = pd.read_csv('/kaggle/input/tabular-playground-series-jan-2021/sample_submission.csv')

#sanity check
assert np.all(test_df['id'].values ==  sub['id'].values)

X_train, y_train = preprocess(train_df)
X_test, y_test = preprocess(test_df)

## The RegularizedRegression Class

In [None]:
class LinearRegression():
    """"My custom regression class"""   
    def __init__(self, C=0.1,lr=1e-3, penalty=None, n_iter=10000):
        self.C = C
        self.lr = lr
        self.history = []
        self.penalty = penalty 
        self.n_iter = n_iter
        return
    
    def linreg(self, x):
        """"Linear regression function"""  
        return x @ self.w.t() + self.b            # matrix multiply inputs (x) with the transposed weights (w) and add the intercept (b)
    
    def loss(self, y, y_hat):
        """"Calculate loss"""  
        mse = torch.mean((y-y_hat)**2)            # mean squared error
        
        if self.penalty == 'l2':
            penalty = self.C*torch.sum(self.w**2) # lambda multiplied by the sum of squared weights 
        
        if self.penalty == 'l1':
            penalty = self.C*torch.sum(torch.abs(self.w))    # lambda multiplied by the sum of weights 
        
        if self.penalty == None:
            penalty = 0 
        
        return  mse + penalty 
    
    def cast_to_tensor(self, x):
        return torch.tensor(x).float()
    
        
    def fit(self,x,y):
        """"Fit model"""  
        x = self.cast_to_tensor(x)
        y = self.cast_to_tensor(y)
        
        self.w = torch.randn(x.size()[1], requires_grad=True) #instantiate weights
        self.b = torch.randn(1, requires_grad=True)           #instantiate bias
        
        for i in range(self.n_iter):
            y_hat = self.linreg(x)    # make predictions
            loss = self.loss(y,y_hat) # calculate loss function
            loss.backward()           # backprop
            
            with torch.no_grad(): 
                self.w -= self.w.grad * self.lr #update weights
                self.b -= self.b.grad * self.lr #update bias
                self.w.grad.zero_()
                self.b.grad.zero_()
            
            self.history.append(loss.item())
            
    def predict(self, x):
        """"Predict"""  
        x = self.cast_to_tensor(x)
        return self.linreg(x).detach().numpy()
    

    
    def plot_history(self):
        """"Plot loss function over time"""  
        return sns.lineplot(x=[i+1 for i in range(len(self.history))],y=self.history).set(xlabel='Iteration', ylabel='Loss',title='History')

## Fit the model

In [None]:
clf = LinearRegression(penalty='l1')
clf.fit(X_train,y_train)

## Plot loss over time

In [None]:
clf.plot_history()

## Make predictions

In [None]:
sub['target'] = clf.predict(X_test)

sub.to_csv('submission.csv',index=False)