In [242]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from scipy import stats

In [333]:
X = [[1,2,2],[2,5,4], [3,1,2], [6,2,7],[10,4,1],[3,1,2]]
X = pd.DataFrame(X,columns=['A','B','C'])
y = [1,2,1,3,1,1]

In [427]:
def calcLogLikelihood(guess, true, n):
    import math
    error = true-guess
    sigma = np.std(error)
    f = ((1.0/(2.0*math.pi*sigma*sigma))**(n/2))* \
        np.exp(-1*((np.dot(error.T,error))/(2*sigma*sigma)))
    return np.log(f)

def calcDurbinWatson(guess, true):
    e = true-guess
    su = 0
    su1 = 0
    for i in range(len(e)):
        if i+1<len(e):
            su += (e[i+1] - e[i])**2
        su1 += (e[i])**2
    return su/su1

def calConditionNumber(x):
    # Getting the singular values from SVD
    _, sing_as, _ = np.linalg.svd(x)

    cond_no = sing_as.max()/sing_as.min()
    return cond_no

def LinearRegressionSummary(lm, X, y):
    import pandas as pd
    import numpy as np
    from scipy import stats
    
    params = np.append(lm.intercept_,lm.coef_)
    pred = lm.predict(X)
    
    newX = pd.DataFrame({"Constant":np.ones(len(X))}).join(pd.DataFrame(X))
    
    n = len(X)
    k = X.shape[1]
    df_res = n - k - 1
    df_model = k
    score = lm.score(X,y)
    
    MSE = (sum((y-pred)**2))/(df_res)
    var_b = MSE*(np.linalg.inv(np.dot(newX.T,newX)).diagonal())
    sd_b = np.sqrt(var_b)
    ts_b = params/ sd_b
    
    p_values = [2*(1-stats.t.cdf(np.abs(i),df_res)) for i in ts_b]
    lower = [params[i] - stats.t.ppf(q = 0.975, df = df_res) * sd_b[i] for i in range(k+1)]
    upper = [params[i] + stats.t.ppf(q = 0.975, df = df_res) * sd_b[i] for i in range(k+1)]
    
    sd_b = np.round(sd_b,3)
    ts_b = np.round(ts_b,3)
    params = np.round(params,4)
    p_values = np.round(p_values,3)
    lower = np.round(lower,3)
    upper = np.round(upper,3)
    
    print('Logistic Regression Summary'.center(78))
    print('='*78)
    print('Model: Logistic Regression')
    print('No. of Observations:',n)
    print('Df Resifuals:', df_res)
    print('Df Model:',df_model)
    print('R-squared:', np.round(score,3))
    print('Adj. R-squared:', np.round(1 - (1-score)*(n-1)/(df_res),3))
    print('F-statistic:', np.round((score/(1-score))*(df_res/k),3))
    print('Prob (F-statistic):', np.round(1-stats.f.cdf((score/(1-score))*(df_res/k), k, df_res),6))
    print('Log-Likelihood:', np.round(calcLogLikelihood(pred, y, n),3))
    print('AIC:', np.round(-2*(calcLogLikelihood(pred, y, n)) + 2*(k+1),3))
    print('BIC:', np.round(-2*(calcLogLikelihood(pred, y, n)) + np.log(n) * (k+1),3))
    if (len(y)) >=8:
        print('Omnibus:', np.round(stats.normaltest(y-pred)[0],3))
        print('Prob(Omnibus):', np.round(stats.normaltest(y-pred)[1],3))
    else:
        print('Omnibus:', np.nan)
        print('Prob(Omnibus):', np.nan)
    print('Skewness:', np.round(stats.skew(y-pred),3))
    print('Kurtosis:', np.round(stats.kurtosis(y-pred,fisher=False),3))
    print('Durbin-Watson:', np.round(calcDurbinWatson(pred, y),3))
    print('Jarque-Bera (JB):',np.round(stats.jarque_bera(y-pred)[0],3))
    print('Prob(JB):',np.round(stats.jarque_bera(y-pred)[1],3))
    print('Condition Number:',np.round(calConditionNumber(newX),3))
    print('='*78)
    #print('\n')

    myDF3 = pd.DataFrame()
    myDF3['Features'],myDF3["Coef"],myDF3["std err"],myDF3["t values"],myDF3["P>|t|"], myDF3['[0.025'], myDF3['0.975]'] = [newX.columns,params,sd_b,ts_b,p_values, lower,upper]
    print(myDF3.to_string().replace('\n0', '\n'+'-'*78+'\n0'))
    print('='*78)

In [428]:
lm = LinearRegression()#positive=True)
lm.fit(X,y)

LinearRegressionSummary(lm,X,y)

                         Logistic Regression Summary                          
Model: Logistic Regression
No. of Observations: 6
Df Resifuals: 2
Df Model: 3
R-squared: 1.0
Adj. R-squared: 1.0
F-statistic: 7144.875
Prob (F-statistic): 0.00014
Log-Likelihood: 20.942
AIC: -33.885
BIC: -34.718
Omnibus: nan
Prob(Omnibus): nan
Skewness: -0.852
Kurtosis: 2.456
Durbin-Watson: 1.684
Jarque-Bera (JB): 0.8
Prob(JB): 0.67
Condition Number: 16.917
   Features    Coef  std err  t values  P>|t|  [0.025  0.975]
------------------------------------------------------------------------------
0  Constant  0.1036    0.014     7.613  0.017   0.045   0.162
1         A  0.0246    0.002    13.697  0.005   0.017   0.032
2         B  0.0698    0.004    19.251  0.003   0.054   0.085
3         C  0.3731    0.003   142.689  0.000   0.362   0.384


In [335]:
import statsmodels.api as sm
X2 = sm.add_constant(X)
est = sm.OLS(y, X2)
est2 = est.fit()
print(est2.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                     7145.
Date:                Thu, 26 May 2022   Prob (F-statistic):           0.000140
Time:                        17:20:13   Log-Likelihood:                 20.942
No. Observations:                   6   AIC:                            -33.88
Df Residuals:                       2   BIC:                            -34.72
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.1036      0.014      7.613      0.0

  warn("omni_normtest is not valid with less than 8 observations; %i "
