https://www.statology.org/statsmodels-linear-regression-p-value/

In [38]:
import pandas as pd

#create DataFrame
df = pd.DataFrame({'hours': [1, 2, 2, 4, 2, 1, 5, 4, 2, 4, 4, 3, 6],
                   'exams': [1, 3, 3, 5, 2, 2, 1, 1, 0, 3, 4, 3, 2],
                   'score': [76, 78, 85, 88, 72, 69, 94, 94, 88, 92, 90, 75, 96]})

df_test = pd.DataFrame({'hours': [1, 2],
                   'exams': [1, 3],
                   'score': [76, 78]})

#view head of DataFrame
df.head()

Unnamed: 0,hours,exams,score
0,1,1,76
1,2,3,78
2,2,3,85
3,4,5,88
4,2,2,72


In [2]:
import statsmodels.api as sm

#define predictor and response variables
y = df['score']
x = df[['hours', 'exams']]

#add constant to predictor variables
x = sm.add_constant(x)

#fit linear regression model
model = sm.OLS(y, x).fit()

#view model summary
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                  score   R-squared:                       0.718
Model:                            OLS   Adj. R-squared:                  0.661
Method:                 Least Squares   F-statistic:                     12.70
Date:                Sun, 25 Jun 2023   Prob (F-statistic):            0.00180
Time:                        13:42:26   Log-Likelihood:                -38.618
No. Observations:                  13   AIC:                             83.24
Df Residuals:                      10   BIC:                             84.93
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         71.4048      4.001     17.847      0.0



In [4]:
y

0     76
1     78
2     85
3     88
4     72
5     69
6     94
7     94
8     88
9     92
10    90
11    75
12    96
Name: score, dtype: int64

In [3]:
model.predict(x)

0     75.320242
1     78.023565
2     78.023565
3     85.854381
4     79.235650
5     74.108157
6     95.830211
7     90.702719
8     81.659819
9     88.278550
10    87.066465
11    83.151057
12    99.745619
dtype: float64

In [18]:
model.pvalues.to_dict()      

{'const': 6.514115622692619e-09,
 'hours': 0.0005077783375870764,
 'exams': 0.31548078548056635}

In [17]:
model.tvalues.to_dict()      

{'const': 17.847206721307824,
 'hours': 5.038454768754412,
 'exams': -1.0567620251803829}

In [16]:
model.params.to_dict()      

{'const': 71.4048338368581,
 'hours': 5.127492447129899,
 'exams': -1.2120845921450287}

In [19]:
reg = model

In [22]:
coeff = []

for k in reg.params.to_dict():
    
    coeff.append({
        'feature_name':k,
        'feature_coefficient':reg.params.to_dict()[k],
        'feature_p_value':model.pvalues.to_dict()[k],
        'feature_t_value':model.tvalues.to_dict()[k],
    })
    
coeff

[{'feature_name': 'const',
  'feature_coefficient': 71.4048338368581,
  'feature_p_value': 6.514115622692619e-09,
  'feature_t_value': 17.847206721307824},
 {'feature_name': 'hours',
  'feature_coefficient': 5.127492447129899,
  'feature_p_value': 0.0005077783375870764,
  'feature_t_value': 5.038454768754412},
 {'feature_name': 'exams',
  'feature_coefficient': -1.2120845921450287,
  'feature_p_value': 0.31548078548056635,
  'feature_t_value': -1.0567620251803829}]

In [23]:
pd.DataFrame(coeff)

Unnamed: 0,feature_name,feature_coefficient,feature_p_value,feature_t_value
0,const,71.404834,6.514116e-09,17.847207
1,hours,5.127492,0.0005077783,5.038455
2,exams,-1.212085,0.3154808,-1.056762


In [31]:
import statsmodels

In [29]:
model.save('model')

In [32]:
model_loaded = statsmodels.iolib.smpickle.load_pickle('model')

In [33]:
model_loaded

<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x26f0077c760>

In [34]:
model_loaded.predict(x)

0     75.320242
1     78.023565
2     78.023565
3     85.854381
4     79.235650
5     74.108157
6     95.830211
7     90.702719
8     81.659819
9     88.278550
10    87.066465
11    83.151057
12    99.745619
dtype: float64

In [39]:
y_test = df_test['score']
x_test = df_test[['hours', 'exams']]

#add constant to predictor variables
x_test = sm.add_constant(x_test)

In [40]:
model_loaded.predict(x_test)

0    75.320242
1    78.023565
dtype: float64

In [41]:
x_test

Unnamed: 0,const,hours,exams
0,1.0,1,1
1,1.0,2,3


In [None]:
statsmodels.api.OLS