In [2]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import scipy.stats as st
from matplotlib.ticker import FuncFormatter
import scipy.optimize as op
from tqdm import tqdm
from preprocessing import *
plt.style.use("dark_background") # Config plots for dark mode, delete if on light mode
plt.rcParams['figure.dpi'] = 150 # Hi-res plots

In [3]:
def N(t_scalar, t):
    """
    Returns the number of times in t less than or equal to t_scalar.
    Is used to compute N(t_{i,k}) and N'(t_{i,k}) depending on whether t above is t or t_prime
    """

    return np.searchsorted(t, t_scalar, side="right")

def getTimeDifferences(t, t_prime):
    """
    Input: (sorted) times for a particular station i
    Output: List of differences indexed by [h][k] for this station i
    """

    # h goes until N(t[-1], t) assuming T = t[-1]
    T = t[-1]
    D_result = []
    for h in range(1, N(T, t)+1):
        differences_list = []
        # Construct list of t_ih - t'_ik for k = 1 to N'(T)
        differences_list.append(t[h-1] - t_prime[N(t[h-2], t_prime):N(t[h-1], t_prime)])

        D_result.append(np.array(differences_list))

    return D_result

def getDurationValues():
    raise NotImplementedError

getTimeDifferences(np.array([1,2,3]),np.array([1.1,2.2,3.3]))

[array([], shape=(1, 0), dtype=float64), array([[0.9]]), array([[0.8]])]

In [5]:
def compensator_m5_t_values(t_scalar, t_prime):
    """
    Compute the list that is required for the compensator function but don't depend on the parameters

    Returns t_prime_precomputed
    """
    return t_scalar - t_prime[:N(t_scalar, t_prime)]

In [6]:
def compensator_m5(t_scalar, t_prime_precomputed, durations, lambda_i, alpha_i,
 beta_i, beta_i_prime, gamma_i, delta_i, delta_i_prime):
  """
  Evaluate compensator function for model 5.

  t_scalar: Scalar value at which we evaluate the compensator
  t_prime_precomputed: time differences (t_scalar - t_prime[:N(t_scalar, t_prime)]) array computed outside the function for efficiency
  durations: array of journey durations d'_{i,k} for each arrival time t'_{i,k} at station i
  The rest are parameters in model 5.   
  """

  term1 = lambda_i * t_scalar
  
  term2 = -(alpha_i / beta_i_prime) * np.sum(np.exp(-beta_i_prime * (t_prime_precomputed) - beta_i * (durations-1))-1)

  term3 = -(gamma_i / delta_i_prime) * np.sum(np.exp(-delta_i_prime * (t_prime_precomputed) - delta_i * np.absolute(durations-30))-1)

  return term1 + term2 + term3

In [None]:
def B_i(h, t, t_prime, durations, beta_i, beta_i_prime, time_differences):

    """
    Returns a list of [B_i(1), ..., B_i(h)] for model 5

    Note all index variables such as h, k, etc start at 1, like the mathematical notation.
    """

    # TODO: Compute durations[N(t[h-2], t_prime):N(t[h-1], t_prime)] ahead of time like time_differences
    # Note: we need durations at the exact same indeces as t_prime here

    B = [np.sum(np.exp(-beta_i * (durations[:N(t[0], t_prime)] - 1)) * np.exp(-beta_i_prime*(t[0] - t_prime[:N(t[0], t_prime)])))]

    # Append the rest
    for l in range(2, h+1):
        # First term in recursive formula for B_i(h)
        term1 = np.exp(-beta_i_prime*(t[l-1] - t[l-2])) * B[l-2]
        term2 = np.sum(np.exp(-beta_i * (durations[N(t[h-2], t_prime):N(t[h-1], t_prime)] - 1)) * np.exp(-beta_i_prime*(time_differences[l-1])))
        B.append(term1 + term2)
    return np.array(B)


def C_i(h, t, t_prime, durations, delta_i, delta_i_prime, time_differences):

    """
    Returns a list of [C_i(1), ..., C_i(h)] for model 5

    Note all index variables such as h, k, etc start at 1, like the mathematical notation.
    """

    # TODO: Compute durations[N(t[h-2], t_prime):N(t[h-1], t_prime)] ahead of time like time_differences
    # Note: we need durations at the exact same indeces as t_prime here

    C = [np.sum(np.exp(-delta_i * (durations[:N(t[0], t_prime)] - 1)) * np.exp(-delta_i_prime*(t[0] - t_prime[:N(t[0], t_prime)])))]

    # Append the rest
    for l in range(2, h+1):
        # First term in recursive formula for C_i(h)
        term1 = np.exp(-delta_i_prime*(t[l-1] - t[l-2])) * C[l-2]
        term2 = np.sum(np.exp(-delta_i * (durations[N(t[h-2], t_prime):N(t[h-1], t_prime)] - 1)) * np.exp(-delta_i_prime*(time_differences[l-1])))
        C.append(term1 + term2)
    return np.array(C)

In [None]:
def m5_log_likelihood(t, t_prime, t_prime_precomputed, durations, lambda_i,
 alpha_i, beta_i, beta_i_prime, gamma_i, delta_i, delta_i_prime, time_differences):
   
   T = end_T

   B_ = B_i(len(t), t, t_prime, durations, beta_i, beta_i_prime, time_differences)

   C_ = C_i(len(t), t, t_prime, durations, delta_i, delta_i_prime, time_differences)

   term1 = np.sum(np.log(lambda_i + alpha_i*B_[:len(t)+1] + delta_i*C_[:len(t)+1]))

   term2 = -compensator_m5(T, t_prime_precomputed, durations, lambda_i, alpha_i,
      beta_i, beta_i_prime, gamma_i, delta_i, delta_i_prime)

   return term1 + term2
