In [2]:
import pandas as pd
import math
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

from scipy.interpolate import interp1d #pre written interpolation function
from statsmodels.base.model import GenericLikelihoodModel

The code below references the following two sources:

John Rust's website
https://editorialexpress.com/jrust/nfxp.html

Victor Aguirregabiria and Pedro Mira's website
http://individual.utoronto.ca/vaguirre/wpapers/program_code_survey_joe_2008.html

In [15]:
#fix the bus .dat because Aguirregabiria and Mira hate everyone
data = np.fromfile('bus1234.dat')
data = data.reshape(len(data)/6,6)
data = pd.DataFrame(data,columns=['id','group','year','month','replace','miles'])

#save to .csv so other people don't need to be confused
data.to_csv("bus1234.csv")

#constants
BETA = .97
MAX_X = data['miles'].max()
MIN_X = data['miles'].min()
STATES = 5

print MAX_X, MIN_X

388254.0 0.0


In [22]:
def hm_initial_pr(x_obs, i_obs):
    """calculate state pr"""
    
    interval = int(MAX_X/STATES) + 1
    print interval
    
    x_discrete = (x_obs/interval).astype(int)
    
    df = np.array([x_discrete, i_obs]).transpose()
    df = pd.DataFrame(df, columns=('x','i'))
    pr_obs = df.groupby('x')
    pr_obs = pr_obs.sum()/(1.*pr_obs .count())

    return  np.array(pr_obs)

result1 = hm_initial_pr(data['miles'],data['replace'])
print result1

77651
[[0.        ]
 [0.00296108]
 [0.01472393]
 [0.0290404 ]
 [0.03225806]]


In [27]:
def hm_transitions(x_obs,x_next,i_obs):
    """calculate transitions, deterministic
    in this case"""
    pass

            i
x x_next     
1 2       1.0
2 3       1.0
3 4       1.0
4 5       1.0
5 5       1.0


In [None]:
def hm_value(a_max, theta1, cost, pr_obs):
    """calculate value function using hotz miller approach"""
    
    #set up matrices, transition is deterministic
    trans0, trans1 = hm_transitions(a_max)
    a = np.arange(1,a_max+1).reshape(a_max,1)
    
    #calculate value function for all state
    pr_tile = np.tile( pr_obs.reshape(a_max,1), (1,a_max))
    
    denom = (np.identity(a_max) - BETA*(1-pr_tile)*trans0 - BETA*trans1*pr_tile)
        
    numer = ( (1-pr_obs)*(theta1*a  + GAMMA - np.log(1-pr_obs)) + 
                 pr_obs*(cost+ GAMMA - np.log(pr_obs) ) )
    
    value = np.linalg.inv(denom).dot(numer)
    return value


def hm_prob(a_max, theta1, cost, pr_obs):
    """calculate kappa using value function"""
    
    value = hm_value(a_max, theta1, cost, pr_obs)
    trans0,trans1 = hm_transitions(a_max)
    a = np.arange(1,a_max+1).reshape(a_max,1)

    delta1 = np.exp( cost + BETA*trans1.dot(value))
    delta0 = np.exp( a*theta1 + BETA*trans0.dot(value) )
    
    return delta1/(delta1+delta0)