In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from preprocessing import *
import scipy.optimize as op
plt.style.use("dark_background") # Config plots for dark mode, delete if on light mode
plt.rcParams['figure.dpi'] = 150 # Hi-res plots

In [None]:
def N(t_scalar, t):
    """
    Returns the number of times in t less than or equal to t_scalar.
    Is used to compute N(t_i,k) and N'(t_i,k) etc
    """

    return np.searchsorted(t, t_scalar, side="right")


def B(h, t, t_prime, beta):

    """
    Returns a list of [B_i(1), ..., B_i(h)]

    NOTE: t_prime NEEDS to be sorted here

    Note all index variables such as h, k, etc start at 1, like the mathematical notation.
    """
    B = []

    # Append base case B_i(1)
    B.append(np.sum([np.exp(-1*beta*(t[0] - t_prime[k-1])) for k in range(1, N(t[0], t_prime) + 1)]))

    # Append the rest
    for l in range(2, h+1):

        # First term in recursive formula for B_i(h)
        term1 = np.exp(-1*beta*(t[l-1] - t[l-2])) * B[l-2]

        # Second term
        #term2 = np.sum([np.exp(-1*beta*(t[l-1] - t_prime[k-1])) for k in range(N(t[l-2], t_prime) + 1, N(t[l-1], t_prime) + 1)])

        lower = N(t[l-2], t_prime)
        upper = N(t[l-1], t_prime)

        term2 = np.sum(np.exp(-beta*(t[l-1] - t_prime[lower:upper]))) # IMP: This is the term taking the most time by far

        B.append(term1 + term2)

    return np.array(B)


def compensator_m3(t_scalar, t_prime, lambda_i, alpha_i, beta_i):
    """
    t_scalar: scalar value where Lambda_i(t) is to be evaluated
    t_prime: list of arrival times at station i

    NOTE: t_prime NEEDS TO BE SORTED HERE.
    """

    term1 = lambda_i * t_scalar
    term2 = -(alpha_i / beta_i) * np.sum([np.exp(-beta_i * (t_scalar - t_prime[k-1])) - 1 for k in range(1, N(t_scalar, t_prime) + 1)])

    return term1 + term2


In [None]:
def m3_log_likelihood(t, t_prime, alpha_i, beta_i, lambda_i):

    """
    Gives log likelihood of our three parameters. 
    t: start times from station i
    t_prime: end times at station i

    NOTE: t_prime NEEDS TO BE SORTED HERE
    """
    
    T = t[-1] # TODO: Is this how we get big T? Yes


    # Get B list 
    B_ = np.array(B(len(t), t, t_prime, beta_i))
    truncB = B_[0:len(t)+1]

    #term1 = np.sum(np.log(lambda_i + alpha_i*B_))
    term1 = np.sum(np.log(lambda_i + alpha_i*truncB))
    term2 = -compensator_m3(T, t_prime, lambda_i, alpha_i, beta_i)

    return term1 + term2



In [None]:
t = t_per_station[1]
t_prime = tprime_per_station[1]

m3_log_likelihood(t, np.sort(t_prime), 0.1, 1, 0.1)

In [None]:
sorted_start_ids = np.sort(bike_data.start_id.unique())

## Finding the parameters using likelihood optimisation

In [33]:
optimal_parameters = {}
for st_id in sorted_start_ids:
    print(st_id)
    x0 = [np.log(0.01), np.log(0.1), np.log(0.1)] # np.log(rates_dict[station.start_id.unique()[0]])]

    # TODO: What bounds should we use here?

    t = t_per_station[st_id]
    t_prime = tprime_per_station[st_id] # Need to sort t_prime for likelihood function

    op_m3_likelihood = lambda x: -m3_log_likelihood(t, t_prime, np.exp(x[0]), np.exp(x[0]) + np.exp(x[1]), np.exp(x[2]))
    sol = op.minimize(op_m3_likelihood, x0, method="Nelder-Mead")
    #sol = op.minimize(op_m3_likelihood, x0, method="SLSQP")
    if sol.success:
        transformed_alpha = np.exp(sol.x[0])
        transformed_beta = np.exp(sol.x[1]) + np.exp(sol.x[0])
        transformed_lambda = np.exp(sol.x[2])
        max_params = [transformed_alpha, transformed_beta, transformed_lambda]
        optimal_parameters[st_id] = max_params

    else:
        raise OptimizationError(f"Failed to converge for station {station}.")
optimal_parameters

KeyboardInterrupt: 

In [None]:

optimal_parameters = {}
for station in train_sorted_stations_start:
    print(station.start_id.to_numpy()[0])
    x0 = [0.1, 1, 0.1] # np.log(rates_dict[station.start_id.unique()[0]])]

    t = station.start_time.to_numpy()
    t_prime = np.sort(station.end_time.to_numpy()) # Need to sort t_prime for likelihood function

    op_m3_likelihood = lambda x: -m3_log_likelihood(t, t_prime, x[0], x[1], x[2])
    bounds = ((0.0000001, 10), (0.0000001, 10), (0.0000001, 10))
    #sol = op.minimize(op_m3_likelihood, x0, method="Nelder-Mead", bounds=bounds)
    sol = op.minimize(op_m3_likelihood, x0, method="SLSQP", bounds=bounds)
    if sol.success:
        max_params = sol.x
        optimal_parameters[station.start_id.unique()[0]] = max_params

    else:
        raise OptimizationError(f"Failed to converge.")
optimal_parameters

## Assessing fit for model 3