In [1]:
# library imports

from sklearn.model_selection import train_test_split
import numpy as np

# Stasmodels

In [2]:
# regression data
import random
from sklearn.datasets import make_regression


random.seed(0)

regression_params = {
     'n_samples':1000
    ,'n_features':10
    ,'n_informative':2
    ,'n_targets' : 1
    ,'bias':random.randint(0,10)
    ,'coef':True
}

X,y,coef = make_regression(**regression_params)

# add constant

X = np.concatenate([X,np.ones(shape=(len(X),1))],axis=1)

In [3]:
print(X.shape)

(1000, 11)


In [4]:
print(y.shape)

(1000,)


In [5]:
coef.flat[:]

array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
       42.09594831,  0.        ,  0.        , 34.01708583,  0.        ])

In [6]:
import statsmodels.api as sm

# model
model = sm.OLS(endog=y,exog=X)

In [7]:
model = model.fit()

In [8]:
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 6.410e+31
Date:                Sun, 04 Jul 2021   Prob (F-statistic):               0.00
Time:                        21:17:20   Log-Likelihood:                 28910.
No. Observations:                1000   AIC:                        -5.780e+04
Df Residuals:                     989   BIC:                        -5.774e+04
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
x1         -9.326e-15   2.11e-15     -4.419      0.0

# sklearn

In [9]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()

In [10]:
model = model.fit(X,y)

In [11]:
model.coef_

array([ 2.13424997e-14, -1.05471187e-15,  1.91470104e-16, -1.02695630e-14,
        3.78273801e-14,  4.20959483e+01, -1.32394096e-14,  0.00000000e+00,
        3.40170858e+01,  5.53376789e-15,  0.00000000e+00])

In [12]:
model.intercept_

6.000000000000003

# pytorch

In [13]:
import torch.nn
import torch.tensor
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import SGD


In [14]:
class Linear(torch.nn.Module):
    def __init__(self,n_features):
        super(Linear,self).__init__()
        
        # (n_features,1) shape tensor
        self.__beta = torch.nn.Parameter(torch.rand(n_features,1))
        
    def forward(self,x):
        # x = (n_samples,n_features) 
        return torch.matmul(x.double(),self.beta.double())
    
    @property
    def beta(self):
        return self.__beta
        
    

In [15]:
model = Linear(regression_params['n_features']+1)

In [16]:
criterion = torch.nn.MSELoss()
optimizer = SGD(model.parameters(),lr=0.01)

In [17]:
X = torch.tensor(X)
X

tensor([[-1.6070, -0.1948, -1.4790,  ..., -0.9046,  0.1089,  1.0000],
        [-1.0662, -2.0468,  1.1391,  ..., -0.0263, -0.3280,  1.0000],
        [ 1.3904, -1.2281,  2.8297,  ...,  1.0945, -0.6556,  1.0000],
        ...,
        [-1.0239, -0.8538,  0.0079,  ..., -0.0048,  0.7092,  1.0000],
        [-0.2646,  0.9251,  0.2083,  ...,  1.2735, -0.0123,  1.0000],
        [ 0.0425,  1.1152,  0.2463,  ..., -0.2185, -0.4865,  1.0000]],
       dtype=torch.float64)

In [18]:
y = torch.tensor(y).reshape(-1,1)

In [19]:
epochs = 1000
for epoch in range(epochs):
    y_pred = model(X)
    loss = criterion(y_pred,y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [20]:
list(model.parameters())

[Parameter containing:
 tensor([[-5.2649e-07],
         [ 1.4773e-06],
         [-4.1983e-06],
         [-4.9715e-07],
         [ 1.4184e-10],
         [ 4.2096e+01],
         [ 6.7035e-06],
         [-1.3783e-06],
         [ 3.4017e+01],
         [ 1.1463e-05],
         [ 6.0000e+00]], requires_grad=True)]