In [1]:
# library imports

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np

# Data

In [2]:
# regression data
import random
from sklearn.datasets import make_regression

# rng
random.seed(0)

regression_params = {
     'n_samples':1000
    ,'n_features':10
    ,'n_informative':2
    ,'n_targets' : 1
    ,'bias':random.randint(0,10)
    ,'coef':True
}

X,y,coef = make_regression(**regression_params)

# add constant
X = np.concatenate([X,np.ones(shape=(len(X),1))],axis=1)

# train / test splits
X_train,X_test,y_train,y_test = train_test_split(X,y)

In [3]:
# true regression coefficients  + bias = 6 (based on random seed)
coef.flat[:]

array([ 0.        , 41.03689551,  0.        ,  0.        , 10.64109235,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ])

# Stasmodels

In [4]:
import statsmodels.api as sm

# model
model = sm.OLS(endog=y_train,exog=X_train)

In [5]:
model = model.fit()

In [6]:
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 2.141e+32
Date:                Mon, 05 Jul 2021   Prob (F-statistic):               0.00
Time:                        11:08:42   Log-Likelihood:                 22416.
No. Observations:                 750   AIC:                        -4.481e+04
Df Residuals:                     739   BIC:                        -4.476e+04
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
x1          6.328e-15   9.63e-16      6.574      0.0

In [7]:
# MSE
mean_squared_error(y_true=y_test,y_pred=model.predict(exog=X_test))

6.366131170960722e-28

# sklearn

In [8]:
from sklearn.linear_model import LinearRegression

model = LinearRegression(fit_intercept=True)

In [9]:
# exclude intercept column as sklearn add it's own unless fit_intercept=False
model = model.fit(X_train[:,0:-1],y_train)

In [10]:
model.coef_

array([-2.97345456e-15,  4.10368955e+01,  2.48689958e-14,  1.77635684e-14,
        1.06410923e+01,  2.75335310e-14, -7.99360578e-15, -9.76996262e-15,
       -3.55271368e-15,  1.77635684e-14])

In [11]:
model.intercept_

6.0

In [12]:
mean_squared_error(y_true=y_test,y_pred=model.predict(X_test[:,0:-1]))

3.661835524050761e-27

# pytorch

In [13]:
import torch.nn
import torch.tensor
import torch
from torch.optim import SGD

In [14]:
class Linear(torch.nn.Module):
    def __init__(self,n_features):
        super(Linear,self).__init__()
        
        # (n_features,1) shape tensor
        self.__beta = torch.nn.Parameter(torch.rand(n_features,1))
        
    def forward(self,x):
        # x = (n_samples,n_features) 
        return torch.matmul(x.double(),self.beta.double())
    
    def predict(self,x):
        return np.dot(x,self.beta.detach().numpy())
    
    @property
    def beta(self):
        return self.__beta

In [15]:
# +1 for intercept 
model = Linear(regression_params['n_features']+1)

In [16]:
# make training sets tensors

X_train = torch.tensor(X_train)
y_train = torch.tensor(y_train).reshape(-1,1)

In [17]:
epochs = 1000
criterion = torch.nn.MSELoss()
optimizer = SGD(model.parameters(),lr=0.1)

# training loop 
for epoch in range(epochs):
    y_pred = model(X_train)
    loss = criterion(y_pred,y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [18]:
# regression coefficients 
list(model.parameters())

[Parameter containing:
 tensor([[ 1.3398e-07],
         [ 4.1037e+01],
         [ 2.2644e-07],
         [-2.9988e-07],
         [ 1.0641e+01],
         [-2.1932e-07],
         [ 3.4094e-07],
         [-1.6104e-07],
         [ 5.3127e-08],
         [ 1.7258e-07],
         [ 6.0000e+00]], requires_grad=True)]

In [19]:
# MSE
mean_squared_error(y_true=y_test,y_pred=model.predict(torch.tensor(X_test)))

5.546837902391962e-11

# Pure Python

In [20]:
# turn back into numpy arrays

X_train = X_train.numpy()
y_train = y_train.numpy()

In [21]:
# functions

def predict(x,beta):
    assert x.shape[-1] == beta.shape[0]
    return np.dot(x,beta)

def gradient_step(beta,grad,step_size=0.01):
    return beta - step_size*grad

def gradient(x,beta,y_true):
    y_pred = predict(x,beta)
    
    return np.dot(np.transpose(x),(y_pred-y_true))/len(x)

In [22]:
# initial parameters 
beta = np.array([random.random() for _ in range(regression_params['n_features']+1)]).reshape(-1,1)

for epoch in range(epochs):
    grad = gradient(x=X_train,beta=beta,y_true=y_train)
    beta = gradient_step(beta=beta,grad=grad,step_size=0.1)
beta

array([[ 9.41155594e-16],
       [ 4.10368955e+01],
       [ 1.02278097e-15],
       [-1.83628733e-15],
       [ 1.06410923e+01],
       [-1.29341672e-15],
       [ 1.54251525e-15],
       [-1.09379608e-15],
       [-1.79807758e-16],
       [ 1.42207178e-15],
       [ 6.00000000e+00]])

In [23]:
# MSE
mean_squared_error(y_true=y_test,y_pred=predict(X_test,beta))

8.252068825681648e-28