In [20]:
# IPython
from IPython.core.display import display, HTML, clear_output
display(HTML("<style>.container { width:100% !important; }</style>"))

import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize
import pandas as pd

import statsmodels.api as sm
import statsmodels.formula.api as smf

# Significance of the overall correlation

Reference https://www.geeksforgeeks.org/ordinary-least-squares-ols-using-statsmodels/

In [2]:
df_2_fit_parms = pd.read_csv('./lit_data/two_params_without_my_data.csv')
df_2_fit_parms['nu'] = -1.0*df_2_fit_parms['b']
df_2_fit_parms['phi_alpha'] = df_2_fit_parms['a']
df_2_fit_parms['ln_phi_alpha'] = np.log(df_2_fit_parms['phi_alpha'])

In [3]:
X = df_2_fit_parms['ln_phi_alpha']
Y = df_2_fit_parms['nu']

X = sm.add_constant(X)
model = sm.OLS(Y, X)
results = model.fit()

In [4]:
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                     nu   R-squared:                       0.901
Model:                            OLS   Adj. R-squared:                  0.900
Method:                 Least Squares   F-statistic:                     2068.
Date:                Tue, 05 Oct 2021   Prob (F-statistic):          2.52e-116
Time:                        13:58:34   Log-Likelihood:                -285.10
No. Observations:                 230   AIC:                             574.2
Df Residuals:                     228   BIC:                             581.1
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const            0.5781      0.120      4.821   

# Difference between CEX and AEX data

In [18]:
mod = smf.ols(formula='nu ~ ln_phi_alpha * iex_type', data=df_2_fit_parms)

res = mod.fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                     nu   R-squared:                       0.938
Model:                            OLS   Adj. R-squared:                  0.937
Method:                 Least Squares   F-statistic:                     1144.
Date:                Tue, 05 Oct 2021   Prob (F-statistic):          2.82e-136
Time:                        14:51:15   Log-Likelihood:                -230.56
No. Observations:                 230   AIC:                             469.1
Df Residuals:                     226   BIC:                             482.9
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Intercept       

My notes:
- Using R-style formulas in statsmodels:  https://www.statsmodels.org/dev/example_formulas.html
- Categorical encoding in statsmodels:  https://www.statsmodels.org/stable/examples/notebooks/generated/contrasts.html
    - I blieve the default is treatment (dummy) coding 
    - This looks like one-hot encoding above some baseline that represents the first treatment
    - So here, the Intercept and ln_phi_alpha are for the aex data
    - The iex_type[T.cex] is a term added onto the aex intercept, which is insignificant, so the intercepts don't differ
    - However, the term that modifies the aex slope (ln_phi_alpha:iex_type[T.cex]) is significant, so the two slopes do significantly differ

# Note the types of Python programs I have used

In [21]:
type(scipy.optimize.differential_evolution), type(scipy.optimize.least_squares), type(smf.ols)

(function, function, method)

From the python documentation:

"It’s important to keep in mind that all packages are modules, but not all modules are packages. Or put another way, packages are just a special kind of module. Specifically, any module that contains a \__path__ attribute is considered a package."



In [36]:
import scipy
import statsmodels

'__path__' in dir(statsmodels), '__path__' in dir(scipy)

(True, True)