# Imports

In [None]:
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, Variable, Expression
from biogeme.models import loglogit
from biogeme.tools import likelihood_ratio_test
from biogeme.results import compile_estimation_results
from biogeme.models import loglogit,  boxcox
from biogeme.models.piecewise import piecewise_formula
from biogeme.models import lognested
from biogeme.nests import OneNestForNestedLogit, NestsForNestedLogit
from biogeme.biogeme import BIOGEME

import pickle

import numpy as np
import os

# Data & Variables

In [None]:
# Define the relative path to the data folder
file_path = os.path.join(os.pardir, 'lpmc01.dat')

df = pd.read_csv(file_path, sep = '\t')
df['age_scaled'] = (df['age'] - df['age'].mean()) / df['age'].std()
df['cost_driving'] = df['cost_driving_ccharge'] + df['cost_driving_fuel']
df['dur_pt'] = df['dur_pt_access'] + df['dur_pt_rail'] + df['dur_pt_int'] + df['dur_pt_bus']

database1 = db.Database('lpmc01', df)

# Define the given veriables 
dur_pt = Variable('dur_pt')
cost_driving = Variable('cost_driving')
age_scaled = Variable('age_scaled')
trip_id = Variable('trip_id')
household_id = Variable('household_id')
person_n = Variable('person_n')
trip_n = Variable('trip_n')
travel_mode = Variable('travel_mode')
purpose = Variable('purpose')
fueltype = Variable('fueltype')
faretype = Variable('faretype')
bus_scale = Variable('bus_scale')
survey_year = Variable('survey_year')
travel_year = Variable('travel_year')
travel_month = Variable('travel_month')
travel_date = Variable('travel_date')
day_of_week = Variable('day_of_week')
start_time = Variable('start_time')
age = Variable('age')
female = Variable('female')
driving_license = Variable('driving_license')
car_ownership = Variable('car_ownership')
distance = Variable('distance')
dur_walking = Variable('dur_walking')
dur_cycling = Variable('dur_cycling')
dur_pt_access = Variable('dur_pt_access') # Predicted total access and egress time for public transport route in hours
dur_pt_rail = Variable('dur_pt_rail')
dur_pt_bus = Variable('dur_pt_bus')
dur_pt_int = Variable('dur_pt_int') # Time taken (hrs) at each interchange point
pt_interchanges = Variable('pt_interchanges')   # Number of interchange points in public transport route
dur_driving = Variable('dur_driving')
cost_transit = Variable('cost_transit')
cost_driving_fuel = Variable('cost_driving_fuel')   # Estimated fuel cost of driving route in GBP
cost_driving_ccharge = Variable('cost_driving_ccharge')  # Estimated congestion charge cost of driving route in GBP
driving_traffic_percent = Variable('driving_traffic_percent')



# Define transport availability
# Assume pt, walking, cycle always available, with car availability depending on number of cars per household. From the data, 
# people without driving licenses choose driving as their mode of transport (eg. row 28). 
av_drive =  (car_ownership > 0)
av_pt =1
av_walk = 1
av_cycle = 1

variable_names = ['dur_pt', 'cost_driving', 'age_scaled']  # Replace with your variable name
for variable_name in variable_names:
    if variable_name in database1.data.columns:
        print(f"'{variable_name}' exists in the database.")
    else:
        print(f"'{variable_name}' does NOT exist in the database.")



# Define pt_cost (not needed)
# Original paper, page 31: "Public transport fares are determined for single trips using Oystercard/contactless payment."
# Therefore, cost_transit should already consider faretype and bus_scale

database = db.Database('lpmc01', df)
variable_names = ['dur_pt', 'cost_driving', 'age_scaled']  # Replace with your variable name
for variable_name in variable_names:
    if variable_name in database1.data.columns:
        print(f"'{variable_name}' exists in the database.")
    else:
        print(f"'{variable_name}' does NOT exist in the database.")

# Define driving cost
cost_driving = cost_driving_ccharge + cost_driving_fuel

# Define time taken by each mode of transport
dur_pt = dur_pt_access + dur_pt_int + dur_pt_bus + dur_pt_rail  # Public transport (external) time 

'dur_pt' exists in the database.
'cost_driving' exists in the database.
'age_scaled' exists in the database.
'dur_pt' exists in the database.
'cost_driving' exists in the database.
'age_scaled' exists in the database.


# Model Definition

In [None]:
# Assume every mode of transport is available
availability_walk = 1  
availability_cycle = 1  
availability_pt = 1     
availability_drive = 1

availability = {
    1: availability_walk,   # Walking
    2: availability_cycle,  # Cycling
    3: availability_pt,     # Public Transport
    4: availability_drive   # Driving
}

In [None]:
time_pt = dur_pt
time_cycling = dur_cycling
time_walking = dur_walking  
time_driving = dur_driving

# Model normalized with asc_walking = 0
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)
asc_cycling = Beta(name='asc_cycling', value=0, lowerbound=None, upperbound=None, status=0)
asc_driving = Beta(name='asc_driving', value=0, lowerbound=None, upperbound=None, status=0)

square_tt_coef = Beta('square_cost_coef', 0, None, None, 0)
cube_tt_coef = Beta('cube_cost_coef', 0, None, None, 0)

def power_series(the_variable: Expression) -> Expression:
    """Generate the expression of a polynomial of degree 3

    :param the_variable: variable of the polynomial
    """
    return (
        the_variable
        + square_tt_coef * the_variable**2
        + cube_tt_coef * the_variable * the_variable**3
    )

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking = Beta(name='beta_tt_walking', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking_interact = Beta(name='beta_tt_walking_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling = Beta(name='beta_tt_cycling', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling_interact = Beta(name='beta_tt_cycling_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt = Beta(name='beta_tt_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt_interact = Beta(name='beta_tt_pt_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving = Beta(name='beta_tt_driving', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving_interact = Beta(name='beta_tt_driving_interact', value=0, lowerbound=None, upperbound=None, status=0)

cost_drive_power = power_series(cost_driving)
cost_pt_power = power_series(cost_transit)

mx_age = df['age'].max()

mu_a = Beta('mu_a', 1, 0, None, 0)
mu_b = Beta('mu_b', 1, 0, None, 0)
nest_a = OneNestForNestedLogit(nest_param=mu_a, list_of_alternatives=[1, 2], name='slow modes')
nest_b = OneNestForNestedLogit(nest_param=mu_b, list_of_alternatives=[3, 4], name='faster modes')
nests = NestsForNestedLogit(choice_set=list(V), tuple_of_nests=(nest_a, nest_b))

v_walking = beta_tt_walking * time_walking +  (1/mx_age) * beta_tt_walking_interact * age * time_walking
v_cycling = asc_cycling + beta_tt_cycling * time_cycling + (1/mx_age) * beta_tt_cycling_interact * age * time_cycling 
v_pt = asc_pt + beta_tt_pt * time_pt + (1/mx_age) * beta_tt_pt_interact * age * time_pt + beta_cost * cost_pt_power
v_driving = asc_driving + beta_tt_driving * time_driving + (1/mx_age) * beta_tt_driving_interact * age * time_driving + beta_cost * cost_drive_power

V = {1: v_walking, 2: v_cycling, 3: v_pt, 4: v_driving}

logprob_m4 = lognested(V, availability, nests, travel_mode)

model_4 = bio.BIOGEME(database, logprob_m4)
model_4.modelName = 'model_4'


# Model Results

In [None]:
results_m4 = model_4.estimate()
model_4_loglike = results_m4.data.logLike
model_4_numParam = results_m4.get_estimated_parameters().shape[0]

results_m4.get_estimated_parameters()

Estimation results for Model 0:
              Value  Rob. Std err  Rob. t-test  Rob. p-value
ASC_CYCLE -3.778175      0.103171   -36.620564           0.0
ASC_DRIVE -1.288003      0.079431   -16.215324           0.0
ASC_PT    -0.525660      0.054777    -9.596426           0.0
B_COST    -0.190833      0.014635   -13.039920           0.0
B_TIME    -5.598770      0.203407   -27.524922           0.0


In [None]:
print(results_m4.print_general_statistics())

# Test against Model 3

In [None]:
model_4_loglike = results_m4.data.logLike
model_4_numParam = results_m4.get_estimated_parameters().shape[0]
