# Sample code using stepwise_functions

In [1]:
import stepwise_functions as step

In [2]:
dir(step)

['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'add1',
 'all_subsets',
 'combinations',
 'drop1',
 'np',
 'pd',
 'report_best_all',
 'sm',
 'step_back',
 'step_forward']

In [3]:
help(step.step_back)

Help on function step_back in module stepwise_functions:

step_back(model, y, X, active_features=None, crit='AIC', **kwargs)
    model: any statsmodels regressor that takes numpy array arguments as inputs 
           and that has bic() and aic() methods
    y: endogenous variable for the model (numpy array)
    X: exogenous variables for the model, both active and candidates to be added (numpy array)
    active_features: numeric list indicating the current active features of X
    crit: 'AIC' or 'BIC', will sort the output.
    **kwargs: any keyword arguments to be passed to the model
    
    Returns: DataFrame 
             number of variables, AIC, BIC, and the identification of all features present in the model
             and the best deletion of feature iterated until only one feature is left based on crit



In [4]:
import statsmodels.api as sm
import pandas as pd
import numpy as np
x = np.random.normal(0,1,100)
y = 2 - 4*x + x**2 + 2*x**3 + np.random.normal(0,1,100)

X = np.c_[np.ones(100), x, x**2, x**3, x**4, x**5]
X1 = X[:,1:]
base = (sm.OLS,y,X)

In [5]:
step.all_subsets(*base)

Unnamed: 0,N_var,AIC,BIC,Feature1,Feature2,Feature3,Feature4,Feature5
0,1,504.961,510.171,True,False,False,False,False
1,1,501.853,507.063,False,True,False,False,False
2,1,465.86,471.071,False,False,True,False,False
3,1,506.264,511.475,False,False,False,True,False
4,1,448.827,454.038,False,False,False,False,True
5,2,500.198,508.013,True,True,False,False,False
6,2,375.724,383.54,True,False,True,False,False
7,2,505.005,512.821,True,False,False,True,False
8,2,407.092,414.908,True,False,False,False,True
9,2,451.677,459.492,False,True,True,False,False


In [6]:
all(step.all_subsets(sm.OLS, y, X) == step.all_subsets(*base))

True

In [7]:
step.report_best_all(sm.OLS, y, X, 3)

   N_var      AIC      BIC Feature1 Feature2 Feature3 Feature4 Feature5
15     3  314.423  324.843     True     True     True    False    False
26     4  316.273  329.299     True     True     True    False     True
25     4  316.309  329.334     True     True     True     True    False
30     5  318.229   333.86     True     True     True     True     True
   N_var      AIC      BIC Feature1 Feature2 Feature3 Feature4 Feature5
15     3  314.423  324.843     True     True     True    False    False
26     4  316.273  329.299     True     True     True    False     True
25     4  316.309  329.334     True     True     True     True    False
18     3  321.085  331.505     True    False     True     True    False


Unnamed: 0,N_var,AIC,BIC,Feature1,Feature2,Feature3,Feature4,Feature5
0,1,504.961,510.171,True,False,False,False,False
1,1,501.853,507.063,False,True,False,False,False
2,1,465.86,471.071,False,False,True,False,False
3,1,506.264,511.475,False,False,False,True,False
4,1,448.827,454.038,False,False,False,False,True
5,2,500.198,508.013,True,True,False,False,False
6,2,375.724,383.54,True,False,True,False,False
7,2,505.005,512.821,True,False,False,True,False
8,2,407.092,414.908,True,False,False,False,True
9,2,451.677,459.492,False,True,True,False,False


In [8]:
step.drop1(sm.OLS, y, X)

Unnamed: 0,N_var,AIC,BIC,Feature1,Feature2,Feature3,Feature4,Feature5
2,4,316.273,329.299,True,True,True,False,True
1,4,316.309,329.334,True,True,True,True,False
0,5,318.229,333.86,True,True,True,True,True
4,4,322.956,335.981,True,False,True,True,True
3,4,344.527,357.553,True,True,False,True,True
5,4,399.126,412.152,False,True,True,True,True


In [9]:
step.drop1(sm.OLS, y, X1)

Unnamed: 0,N_var,AIC,BIC,Feature1,Feature2,Feature3,Feature4,Feature5
2,4,316.273,329.299,True,True,True,False,True
1,4,316.309,329.334,True,True,True,True,False
0,5,318.229,333.86,True,True,True,True,True
4,4,322.956,335.981,True,False,True,True,True
3,4,344.527,357.553,True,True,False,True,True
5,4,399.126,412.152,False,True,True,True,True


In [10]:
step.drop1(*base,[1,3,5],'BIC')

Unnamed: 0,N_var,AIC,BIC,Feature1,Feature2,Feature3,Feature4,Feature5
0,3,370.873,381.294,True,False,True,False,True
1,2,375.724,383.54,True,False,True,False,False
2,2,407.092,414.908,True,False,False,False,True
3,2,441.766,449.582,False,False,True,False,True


In [11]:
print(step.add1(sm.OLS, y, X1, [0,2,3]))

print(step.add1(sm.OLS, y, X))

  N_var      AIC      BIC Feature1 Feature2 Feature3 Feature4 Feature5
1     4  316.309  329.334     True     True     True     True    False
0     3  321.085  331.505     True    False     True     True    False
2     4  322.956  335.981     True    False     True     True     True
  N_var      AIC      BIC Feature1 Feature2 Feature3 Feature4 Feature5
5     1  448.827  454.038    False    False    False    False     True
3     1   465.86  471.071    False    False     True    False    False
2     1  501.853  507.063    False     True    False    False    False
1     1  504.961  510.171     True    False    False    False    False
0     0  505.743  508.348    False    False    False    False    False
4     1  506.264  511.475    False    False    False     True    False


In [12]:
step.step_back(*base, crit='BIC')

Unnamed: 0,N_var,AIC,BIC,Feature1,Feature2,Feature3,Feature4,Feature5
1,3,314.423,324.843,True,True,True,False,False
2,4,316.273,329.299,True,True,True,False,True
0,5,318.229,333.86,True,True,True,True,True
2,2,375.724,383.54,True,False,True,False,False
2,1,465.86,471.071,False,False,True,False,False


In [13]:
step.step_forward(*base, crit='BIC')

Unnamed: 0,N_var,AIC,BIC,Feature1,Feature2,Feature3,Feature4,Feature5
1,5,318.229,333.86,True,True,True,True,True
2,4,322.956,335.981,True,False,True,True,True
3,3,344.766,355.187,True,False,False,True,True
1,2,407.092,414.908,True,False,False,False,True
5,1,448.827,454.038,False,False,False,False,True
0,0,505.743,508.348,False,False,False,False,False
