In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import datetime
#graphing
import matplotlib.pyplot as plt
#stats
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel

#import testing
import sys
sys.path.append("../")
import selection_tests

In [2]:
#format the bus .dat from augirregabiria and mira's website
data = np.fromfile('bus1234.dat')
data = data.reshape(int(len(data)/6),6)
data = pd.DataFrame(data,columns=['id','group','year','month','replace','miles'])

#save to .csv
data.to_csv('bus1234.csv')

#divide by 1e6 (use the same scale are Rust and AM)
data['miles'] = (data['miles'])/1e6

#switch to date time for ease 
data['date'] = pd.to_datetime(data[['year', 'month']].assign(Day=1))
data = data[['id','group','date','replace','miles']]

#lag date
date_lag = data.copy()
date_lag['date'] = date_lag['date'] - pd.DateOffset(months=1)
data = data.merge(date_lag, how='left', on=['id','group','date'] , suffixes=('','_next'))
data = data.dropna()

In [3]:
#define cost functon using lambda expression
linear_cost = lambda params, x, i: (1-i)*x*params[i] + i*params[i]

linear_model = selection_tests.CCP(data['replace'], data['miles'], 
                                   data['miles_next'], ['theta1','RC'], linear_cost,.9999)
print(linear_model.results.summary())

Optimization terminated successfully.
         Current function value: 0.036542
         Iterations: 64
         Function evaluations: 123
                                 CCP Results                                  
Dep. Variable:                replace   Log-Likelihood:                -298.04
Model:                            CCP   AIC:                             598.1
Method:            Maximum Likelihood   BIC:                             605.1
Date:                Tue, 03 Aug 2021                                         
Time:                        16:59:01                                         
No. Observations:                8156                                         
Df Residuals:                    8155                                         
Df Model:                           0                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------

In [4]:
linear_model = selection_tests.CCP(data['replace'], data['miles'],
                                   data['miles_next'], ['theta1','RC'], linear_cost,.5)
print(linear_model.results.summary())

Optimization terminated successfully.
         Current function value: 0.037247
         Iterations: 65
         Function evaluations: 122
                                 CCP Results                                  
Dep. Variable:                replace   Log-Likelihood:                -303.79
Model:                            CCP   AIC:                             609.6
Method:            Maximum Likelihood   BIC:                             616.6
Date:                Tue, 03 Aug 2021                                         
Time:                        16:59:01                                         
No. Observations:                8156                                         
Df Residuals:                    8155                                         
Df Model:                           0                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------

In [5]:
regular_test_model = selection_tests.regular_test(data,selection_tests.setup_test)
regular_test_model
#(2, -3.5293299272230336)
#(2, -3.428806141951201)

Optimization terminated successfully.
         Current function value: 0.037276
         Iterations: 71
         Function evaluations: 139
[-14.44699099  -7.43576269]
Optimization terminated successfully.
         Current function value: 0.036542
         Iterations: 64
         Function evaluations: 123
[ -0.52444452 -10.14609925]
regular: test, llr, omega ----
3.4288061420568425 5.9860991413958295 0.01933135806467295
---- 


(1, 3.4288061420568425)

In [6]:
# Fit the bootstrap test model
bootstrap_test_model = selection_tests.bootstrap_test(data,selection_tests.setup_test, trials=100)
bootstrap_test_model

Optimization terminated successfully.
         Current function value: 0.037276
         Iterations: 71
         Function evaluations: 139
[-14.44699099  -7.43576269]
Optimization terminated successfully.
         Current function value: 0.036542
         Iterations: 64
         Function evaluations: 123
[ -0.52444452 -10.14609925]


(1, 1.7693194072604168, 5.610374183495992)

In [7]:
selection_tests.test_table(data,selection_tests.setup_test, trials=100)

Optimization terminated successfully.
         Current function value: 0.037276
         Iterations: 71
         Function evaluations: 139
[-14.44699099  -7.43576269]
Optimization terminated successfully.
         Current function value: 0.036542
         Iterations: 64
         Function evaluations: 123
[ -0.52444452 -10.14609925]
Optimization terminated successfully.
         Current function value: 0.037276
         Iterations: 71
         Function evaluations: 139
[-14.44699099  -7.43576269]
Optimization terminated successfully.
         Current function value: 0.036542
         Iterations: 64
         Function evaluations: 123
[ -0.52444452 -10.14609925]
Optimization terminated successfully.
         Current function value: 0.037276
         Iterations: 71
         Function evaluations: 139
[-14.44699099  -7.43576269]
Optimization terminated successfully.
         Current function value: 0.036542
         Iterations: 64
         Function evaluations: 123
[ -0.52444452 -10.14609925