# 204: Exampville Joint Mode and Destination Choice

Welcome to Exampville, the best simulated town in this here part of the internet!

Exampville is a demonstration provided with Larch that walks through some of the 
data and tools that a transportation planner might use when building a travel model. 

In [None]:
# TEST
import larix as lx
from pytest import approx

In [None]:
import numpy as np
import pandas as pd
import larix as lx
from larix import P, X

In this example notebook, we will walk through the estimation of a tour 
destination choice model.  First, let's load the data files from
our example.

In [None]:
hh, pp, tour, skims, emp = lx.example(200, ['hh', 'pp', 'tour', 'skims', 'emp'])

In [None]:
hh['INCOME_GRP'] = pd.qcut(hh.INCOME, 3)

In [None]:
od_skims = lx.Dataset.construct.from_omx(skims)

In [None]:
from addicty import Dict

Mode = Dict(
    DA = 1,
    SR = 2,
    Walk = 3,
    Bike = 4,
    Transit = 5,
).freeze()

In [None]:
Dest = od_skims.dtaz.values

In [None]:
import itertools

In [None]:
Mode.to_dict()

In [None]:
altcodes = list(j*10 + i for j,i in itertools.product(Dest, Mode.values()))
altnames = list(f"{i}_{j:02d}" for j,i in itertools.product(Dest, Mode.keys()))
Alts = dict(zip(altnames, altcodes))

In [None]:
tour_dataset = lx.Dataset.construct.from_idco(tour.set_index('TOURID'), alts=Alts)

In [None]:
tour_dataset["CHOICE"] = tour_dataset.TOURMODE + tour_dataset.DTAZ * 10

In [None]:
tour_dataset["AltTAZ"] = tour_dataset['_altid_'] // 10
tour_dataset["AltMODE"] = tour_dataset['_altid_'] % 10

In [None]:
dt = lx.DataTree(
    tour=tour_dataset,
    hh=hh.set_index('HHID'),
    person=pp.set_index('PERSONID'),
    od=od_skims,
    do=od_skims,
    emp=emp,
    relationships=(
        "tour.HHID @ hh.HHID",
        "tour.PERSONID @ person.PERSONID",
        "hh.HOMETAZ @ od.otaz",
        "tour.AltTAZ @ od.dtaz",
        "hh.HOMETAZ @ do.dtaz",
        "tour.AltTAZ @ do.otaz",
        "tour.AltTAZ @ emp.TAZ",
    ),
)

In [None]:
availability_co_vars = {
    Mode.DA: 'AGE >= 16',
    Mode.SR: 1,
    Mode.Walk: 'WALK_TIME < 60',
    Mode.Bike: 'BIKE_TIME < 60',
    Mode.Transit: 'TRANSIT_FARE>0',
}

In [None]:
avail_ca_parts = "+".join(X(f"(AltMODE=={i})*({a})") for i,a in availability_co_vars.items())
avail_ca_parts

In [None]:
dt_work = dt.query_cases("TOURPURP == 1")

## Preprocessing

Then we bundle all our raw data into a `DataTree` structure, 
which is used to collect the right data for estimation.  The
Larch DataTree is a slightly augmented version of the regular
`sharrow.DataTree`.

## Model Definition

Now we can define our choice model, using data from the tree as appropriate.

In [None]:
m = lx.Model(datatree=dt_work)
m.title = "Exampville Work Tour Mode and Destination Choice v1"

In [None]:
for dest in Dest:
    d = dest * 10
    Car = m.graph.new_node(parameter='Mu:Car', children=[d+Mode.DA, d+Mode.SR], name='Car')
    NonMotor = m.graph.new_node(parameter='Mu:NonMotor', children=[d+Mode.Walk, d+Mode.Bike], name='NonMotor')
    Motor = m.graph.new_node(parameter='Mu:Motor', children=[Car, d+Mode.Transit], name='Motor')
    DestNest = m.graph.new_node(parameter='Mu:Dest', children=[Motor, NonMotor], name='Destination')

In [None]:
m.availability_ca_var = avail_ca_parts

In [None]:
m.quantity_ca = (
        + P.EmpRetail_HighInc * X('RETAIL_EMP * (INCOME>50000)')
        + P.EmpNonRetail_HighInc * X('NONRETAIL_EMP') * X("INCOME>50000")
        + P.EmpRetail_LowInc * X('RETAIL_EMP') * X("INCOME<=50000")
        + P.EmpNonRetail_LowInc * X('NONRETAIL_EMP') * X("INCOME<=50000")
)

m.quantity_scale = P.Theta


In [None]:
m.utility_ca = (
    + P.distance * X.AUTO_DIST
    + P.InVehTime * (
        + X.AUTO_TIME * X(f"AltMODE in ({Mode.DA}, {Mode.SR})")
        + X.TRANSIT_IVTT * X(f"AltMODE == {Mode.Transit}")
    )
    + P.OutVehTime * X.TRANSIT_OVTT * X(f"AltMODE == {Mode.Transit}")
    + P.NonMotorTime * (
        + X.WALK_TIME * X(f"AltMODE == {Mode.Walk}")
        + X.BIKE_TIME * X(f"AltMODE == {Mode.Bike}")
    )
    + P.Cost * (
        + X.AUTO_COST * X(f"AltMODE == {Mode.DA}")
        + X("AUTO_COST * 0.5") * X(f"AltMODE == {Mode.SR}")
        + X.TRANSIT_FARE * X(f"AltMODE == {Mode.Transit}")
    )
    + P.ASC_SR * X(f"AltMODE == {Mode.SR}")
    + P.ASC_Walk * X(f"AltMODE == {Mode.Walk}")
    + P.ASC_Bike * X(f"AltMODE == {Mode.Bike}")
    + P.ASC_Transit * X(f"AltMODE == {Mode.Transit}")
    + P("LogIncome:SR") * X("log(INCOME)") * X(f"AltMODE == {Mode.SR}")
    + P("LogIncome:Walk") * X("log(INCOME)") * X(f"AltMODE == {Mode.Walk}")
    + P("LogIncome:Bike") * X("log(INCOME)") * X(f"AltMODE == {Mode.Bike}")
    + P("LogIncome:Transit") * X("log(INCOME)") * X(f"AltMODE == {Mode.Transit}")
)

In [None]:
m.choice_co_code = "tour.CHOICE"

In [None]:
m.compute_engine = 'numba'

In [None]:
m.set_cap(25)

In [None]:
m.pf

In [None]:
m.plock(EmpRetail_HighInc=0, EmpRetail_LowInc=0)

In [None]:
# TEST
x = {
    'ASC_Bike': -0.5240493998048744,
    'ASC_SR': 3.236290438517284,
    'ASC_Transit': 8.778656078246991,
    'ASC_Walk': 6.586505131588745,
    'Cost': -0.3690205730692789,
    'EmpNonRetail_HighInc': 1.3301012502070557,
    'EmpNonRetail_LowInc': -0.9099962346214282,
    'EmpRetail_HighInc': 0.0,
    'EmpRetail_LowInc': 0.0,
    'InVehTime': -0.12186665041621693,
    'LogIncome:Bike': -0.15885556332102918,
    'LogIncome:SR': -0.43651343262581205,
    'LogIncome:Transit': -0.7055947243480206,
    'LogIncome:Walk': -0.3625004764836594,
    'Mu:Car': 0.5765014801086261,
    'Mu:Dest': 0.8772447924568757,
    'Mu:Motor': 0.8562164680908089,
    'Mu:NonMotor': 0.8057181271317372,
    'NonMotorTime': -0.2395700551175699,
    'OutVehTime': -0.2807523025163492,
    'Theta': 0.7468696025360714,
    'distance': 0.006652335260990816,
}
assert m.loglike(x) == approx(-28646.661840926616)
assert m.d_loglike(x) == approx(np.array([ 
    1.454517e-01,  4.549069e-01, -7.067049e-01,  1.339604e+00, -8.713451e-01,  6.543924e-01,  2.099017e-01,
    0.000000e+00,  0.000000e+00,  1.075398e-01, -2.423103e-01,  1.206464e+01,  3.165643e-02,  1.356228e+00,
    1.501221e+00, -2.958951e-01, -2.940929e-03, -3.309181e-02,  1.634716e+00,  4.964662e-03,  1.694484e-01,
    3.421337e-01]))
m.pvals = 'init'

## Model Estimation

In [None]:
m.compute_engine = 'numba'

In [None]:
m.loglike(m.pvals)

In [None]:
m.pf

In [None]:
result = m.maximize_loglike(stderr=True, maxiter=500) 

In [None]:
m.parameter_summary()

In [None]:
# TEST
assert result.loglike == approx(-28653.74150042629)
assert m.loglike() == approx(-28653.74150042629)

## Math



Utility of a given alternative

$V_i = \beta X_i + \theta \log (\exp(\gamma)Z_i) $

Logsum of two alternatives, i and j, in a nest

$ \Gamma_n = \mu \log \left( 
  \exp \left(\frac{V_i}{\mu}\right) 
+ \exp \left(\frac{V_j}{\mu}\right)
\right)$

Logsum of two alternatives, i and j, in a nest with size Q

$ \Gamma_n = \mu \log \left( 
  \exp \left(\frac{V_i}{\mu}\right) 
+ \exp \left(\frac{V_j}{\mu}\right)
\right) * Q$

$ \Gamma_n = \mu \log \left( 
  \exp \left(\frac{V_i + \theta\log(Q)}{\mu}\right) 
+ \exp \left(\frac{V_j + \theta\log(Q)}{\mu}\right)
\right)$

$ \Gamma_n = \mu \log \left( 
  \exp \left(\frac{V_i}{\mu} + \frac{\theta\log(Q)}{\mu}\right) 
+ \exp \left(\frac{V_j}{\mu} + \frac{\theta\log(Q)}{\mu}\right)
\right)$

$ \Gamma_n = \mu \log \left( 
  \exp \left(\frac{V_i}{\mu}\right) * \exp\left(\frac{\theta\log(Q)}{\mu}\right) 
+ \exp \left(\frac{V_j}{\mu}\right) * \exp\left(\frac{\theta\log(Q)}{\mu}\right)
\right)$

$ \Gamma_n = \mu \log \left( 
    \left(
      \exp \left(\frac{V_i}{\mu}\right) 
    + \exp \left(\frac{V_j}{\mu}\right) 
    \right) * \exp\left(\frac{\theta\log(Q)}{\mu}\right)
\right)$

$ \Gamma_n = \mu \log \left( 
      \exp \left(\frac{V_i}{\mu}\right) 
    + \exp \left(\frac{V_j}{\mu}\right) 
\right) + \left(
    \left(\theta\log(Q)\right)
\right)$

$ \Gamma_n = \mu \log( 
+ \exp (  (\beta X_i + \theta \log (\exp(\gamma)Z_i))  / \mu) 
+ \exp (  (\beta X_j + \theta \log (\exp(\gamma)Z_j))  / \mu)  
)$

$ \Gamma_n = \mu \log( 
+ \exp (  (\beta X_i / \mu)) * \exp((\theta \log (\exp(\gamma)Z_i) / \mu)  ) 
+ \exp (  (\beta X_j / \mu)) * \exp((\theta \log (\exp(\gamma)Z_j) / \mu)  )  
)$

$ \Gamma_n = \mu \log( 
( \exp (  \frac{V_i}{\mu})) 
+ \exp (  \frac{V_j}{\mu}))) + ((\theta \log (Q) / \mu)   
)$