In [1]:
import pickle
import numpy as np
import pandas as pd
from scipy.optimize import minimize
import torch

from policy import DPAgent
from new_new_policy import DiscretizedDPAgent
from simulation import Simulator, CustomerGenerator
from hazard_models import ExponentialHazard
from utility_learner import ProjectedVolumeLearner, diam
from degradation_learner import DegradationLearner
from datetime import datetime
from pytz import timezone

from utils import unit_ball_rejection_sample, correct_signs
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

import logging
logging.basicConfig(level=logging.INFO)

np.set_printoptions(suppress=True)

In [2]:
# --- 2. Define Sampling Functions ---
# def context_sampler() -> np.ndarray:
#     """Samples a customer's context vector from a uniform distribution."""
#     return np.random.uniform(low=0.0, high=1.0, size=D)

def context_sampler() -> np.ndarray:
    """Samples a customer's context vector uniformly from the unit ball."""
    return np.abs(unit_ball_rejection_sample(D))

def rental_sampler() -> float:
    """Samples a customer's desired rental duration from an exponential distribution."""
    return np.random.exponential(scale=10.0)

def interarrival_sampler() -> float:
    """Samples the time until the next customer arrives."""
    return np.random.exponential(scale=5.0)

In [3]:
# --- 1. Simulation Configuration ---
D = 4                                  # Dimension of context vectors
LAMBDA_VAL = 0.001                     # Baseline hazard constant
NUM_CUSTOMERS = 40000                   # Total number of customers to simulate, i.e. T

# Set a random seed for reproducibility
# np.random.seed(41)

# Ground truth vectors
THETA_TRUE = np.array([0.5, 0.2, 0.4, 0.3])#$, 0.4])    # For degradation
UTILITY_TRUE = np.array([0.372450167, 0.10850869, 0.33930126, 0.71356037])

# context_sampler()  # For customer's willingness to pay

# --- Machine's Pricing Vector 'r' ---
# This is a fallback pricing vector, when we don't feed u_hat to calculate_price
PRICING_R = np.zeros(D)

In [4]:
usage_exp_hazard_model = ExponentialHazard(lambda_val=LAMBDA_VAL)
# spontaneous_exp_hazard_model = None # ExponentialHazard(lambda_val=0.01)

customer_gen = CustomerGenerator(
    d=D,
    context_sampler=context_sampler,
    rental_sampler=rental_sampler,
    interarrival_sampler=interarrival_sampler
)

centroid_params = {
    # 'num_samples': 2000,
    # 'thin': None,
    # 'burn_in': 500 * D ** 2,
    # 'tol': 1e-4,
    # 'rho_target': 0.01
}

termination_rule = lambda diameter: diameter < 0.0005  # Example custom termination rule

projected_volume_learner = ProjectedVolumeLearner(
    T=NUM_CUSTOMERS, 
    d=D, 
    centroid_params=centroid_params,
    incentive_constant=1.1,
    termination_rule=termination_rule,
)

mdp_params = {
    'duration_lambda': 10.0,
    'interarrival_lambda': 5.0,
    'replacement_cost': 1.5,   # Cost to replace the machine
    'failure_cost': 0.75,      # Additional penalty for in-service failure
    'holding_cost_rate': 0.02,   # Cost per unit of idle time
    'gamma': 0.99,             # Discount factor
    'learning_rate': 1e-3,      # Learning rate for the Adam optimizer
    'target_update_freq': 10    # How often to update the target network (in iterations)
}

training_hyperparams = {
    # For FQI
    'num_iterations': 1, # Number of training iterations per policy update
    'dataset_size': 50000,      # Number of transitions to generate for the offline dataset
    'batch_size': 256,           # Batch size for training

    # For discrete DP
    # 'N': [80, 20, 60, 150], # grid sizes [cum_context, context, duration, active_time
    'N': [100, 50, 100, 100], # grid sizes [cum_context, context, revenue, duration]
    'max_cumulative_context': 8.0,
    # 'max_active_time': 150.0,
    'num_value_iterations': 100,
    
}

policy_type = 'decaying_epsilon_greedy'
policy_kwargs = {
    'current_epsilon': 0.10,
    'decay_rate': 0.95,
    'step': 0,
}

# Instantiate the Simulator with the new parameters
simulator = Simulator(
    d=D,
    T=NUM_CUSTOMERS,
    
    theta_true=THETA_TRUE,
    utility_true=UTILITY_TRUE,
    pricing_r=PRICING_R,
    
    usage_hazard_model=usage_exp_hazard_model,
    customer_generator=customer_gen,
    projected_volume_learner=projected_volume_learner,  # Use default ProjectedVolumeLearner
    
    mdp_params=mdp_params,
    discrete_dp=True,
    policy_type=policy_type,
    training_hyperparams=training_hyperparams,
    policy_kwargs=policy_kwargs,
    policy_update_threshold=100,
    time_normalize=True,
)

In [5]:
# # Lets you skip utility exploration with perfect u starting point
# simulator.projected_volume_learner.centroids.append(UTILITY_TRUE)
# simulator.projected_volume_learner.is_terminated = True
# simulator.seen_breakdowns = 2

# degradation_learner = DegradationLearner(d=simulator.d)
# degradation_learner.theta = np.ones(D) * 0.1
# degradation_learner.cum_baseline = lambda x: LAMBDA_VAL * x
# degradation_learner.inverse_cum_baseline = lambda y: y / LAMBDA_VAL
# simulator.degradation_learner = degradation_learner

# # dp_agent = DPAgent(
# #     d=simulator.d,
# #     u_hat=UTILITY_TRUE,
# #     time_normalize=simulator.time_normalize,
# #     degradation_learner=simulator.degradation_learner,
# #     customer_generator=simulator.customer_generator,
# #     params=simulator.mdp_params
# # )
# # dp_agent.train(**simulator.training_hyperparams)

# dp_agent = DiscretizedDPAgent(
#     N=training_hyperparams['N'], # grid sizes [cum_context, context, duration, active_time]
#     max_cumulative_context=training_hyperparams['max_cumulative_context'],
#     # max_active_time=training_hyperparams['max_active_time'],
#     u_hat=UTILITY_TRUE,
#     degradation_learner=degradation_learner,
#     customer_generator=customer_gen,
#     params=mdp_params,
# )
# # dp_agent._precompute_dynamics(num_samples=50000)
# dp_agent.run_value_iteration(100)

# simulator.dp_agent = dp_agent
# simulator.optimal_policy = dp_agent.get_policy(simulator.policy_type)
# simulator.breakdowns_since_last_update = 0 # Reset the counter


In [6]:
pacific_tz = timezone('America/Los_Angeles')
current_time = datetime.now(pacific_tz).strftime("%Y%m%d_%H%M%S")

# simulator.projected_volume_learner.is_terminated = True
simulation_data = simulator.run(num_customers=NUM_CUSTOMERS)
degradation_df = pd.DataFrame(simulator.degradation_history)
simulation_df = pd.DataFrame(simulator.history)

degradation_df.to_csv(f'data/degradation_data_{current_time}.csv', index=False)
simulation_df.to_csv(f'data/simulation_data_{current_time}.csv', index=False)
simulator.save(f'models/simulator_{current_time}')

INFO:root:Starting simulation for 40000 customers...
  0%|          | 0/40000 [00:00<?, ?it/s]

Set parameter Username


INFO:gurobipy:Set parameter Username


Set parameter LicenseID to value 2651514


INFO:gurobipy:Set parameter LicenseID to value 2651514


Academic license - for non-commercial use only - expires 2026-04-14


INFO:gurobipy:Academic license - for non-commercial use only - expires 2026-04-14
INFO:root:Customer 1: Diameter: 1.0086
  0%|          | 1/40000 [00:03<33:49:56,  3.04s/it]INFO:root:Customer 2: Diameter: 0.7669
  0%|          | 2/40000 [00:06<35:39:43,  3.21s/it]INFO:root:Customer 3: Diameter: 0.6187
  0%|          | 3/40000 [00:09<37:40:16,  3.39s/it]INFO:root:Customer 4: Diameter: 0.6970
  0%|          | 4/40000 [00:14<40:35:34,  3.65s/it]INFO:root:Customer 5: Diameter: 0.7711
  0%|          | 5/40000 [00:18<43:51:53,  3.95s/it]INFO:root:Customer 5: Diameter: 0.3543
  0%|          | 6/40000 [00:23<47:31:16,  4.28s/it]INFO:root:Customer 6: Diameter: 0.3033
  0%|          | 7/40000 [00:28<50:53:39,  4.58s/it]INFO:root:Customer 7: Diameter: 0.3848
  0%|          | 8/40000 [00:34<54:21:34,  4.89s/it]INFO:root:Customer 8: Diameter: 0.1788
  0%|          | 9/40000 [00:40<57:44:32,  5.20s/it]INFO:root:Customer 8: Diameter: 0.1961
  0%|          | 10/40000 [00:46<61:38:10,  5.55s/it]INFO:ro

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.79176D+00    |proj g|=  8.70831D-01

At iterate    1    f=  7.96113D-01    |proj g|=  2.77455D-01

At iterate    2    f=  6.10771D-01    |proj g|=  1.20123D-01

At iterate    3    f=  5.70382D-01    |proj g|=  6.70495D-02

At iterate    4    f=  5.55564D-01    |proj g|=  0.00000D+00

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      4      5      8     0     4   0.000D+00   5.556D-01
  F =  0.55556381420662682     

CONVERG

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.087598
Iteration 20/100 | Max Change (Delta): 0.056308
Iteration 30/100 | Max Change (Delta): 0.045206
Iteration 40/100 | Max Change (Delta): 0.036944
Iteration 50/100 | Max Change (Delta): 0.030217
Iteration 60/100 | Max Change (Delta): 0.024714
Iteration 70/100 | Max Change (Delta): 0.020214
Iteration 80/100 | Max Change (Delta): 0.016533
Iteration 90/100 | Max Change (Delta): 0.013523


INFO:root:Policy updated.
 16%|█▌        | 6371/40000 [09:24<05:48, 96.37it/s]  

Iteration 100/100 | Max Change (Delta): 0.011060

Value iteration finished (max iterations reached).


 23%|██▎       | 9265/40000 [09:24<03:01, 168.97it/s]INFO:root:Updating optimal policy...


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  6.67453D+02    |proj g|=  1.00000D+00

At iterate    1    f=  6.51570D+02    |proj g|=  6.26910D-01

At iterate    2    f=  6.51391D+02    |proj g|=  6.00406D-01

At iterate    3    f=  6.51214D+02    |proj g|=  1.39737D-01


INFO:root:Theta updated. New theta_hat: [0.479 0.489 0.328 0.235]



At iterate    4    f=  6.51213D+02    |proj g|=  2.39726D-02

At iterate    5    f=  6.51213D+02    |proj g|=  3.10497D-04

At iterate    6    f=  6.51213D+02    |proj g|=  1.04891D-05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      6      8      9     0     0   1.049D-05   6.512D+02
  F =   651.21281063341075     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 s

 23%|██▎       | 9265/40000 [09:40<03:01, 168.97it/s]

Iteration 20/100 | Max Change (Delta): 0.142682
Iteration 30/100 | Max Change (Delta): 0.113987
Iteration 40/100 | Max Change (Delta): 0.092720
Iteration 50/100 | Max Change (Delta): 0.075741
Iteration 60/100 | Max Change (Delta): 0.061931
Iteration 70/100 | Max Change (Delta): 0.050650
Iteration 80/100 | Max Change (Delta): 0.041426
Iteration 90/100 | Max Change (Delta): 0.033883


INFO:root:Policy updated.
 25%|██▌       | 10150/40000 [10:52<10:17, 48.31it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.027713

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.20591D+03    |proj g|=  1.00000D+00

At iterate    1    f=  1.14578D+03    |proj g|=  6.15540D-01

At iterate    2    f=  1.14492D+03    |proj g|=  5.91263D-01

At iterate    3    f=  1.14396D+03    |proj g|=  4.76312D-01

At iterate    4    f=  1.14396D+03    |proj g|=  2.76421D-02


INFO:root:Theta updated. New theta_hat: [0.472 0.157 0.453 0.462]



At iterate    5    f=  1.14396D+03    |proj g|=  1.22136D-02

At iterate    6    f=  1.14396D+03    |proj g|=  9.97465D-04

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      6      8      9     0     0   9.975D-04   1.144D+03
  F =   1143.9601432560478     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectatio

 25%|██▌       | 10150/40000 [11:10<10:17, 48.31it/s]

Iteration 20/100 | Max Change (Delta): 0.138606
Iteration 30/100 | Max Change (Delta): 0.110933
Iteration 40/100 | Max Change (Delta): 0.090316
Iteration 50/100 | Max Change (Delta): 0.073794
Iteration 60/100 | Max Change (Delta): 0.060341
Iteration 70/100 | Max Change (Delta): 0.049350
Iteration 80/100 | Max Change (Delta): 0.040363
Iteration 90/100 | Max Change (Delta): 0.033013


INFO:root:Policy updated.
 30%|██▉       | 11848/40000 [12:20<14:08, 33.18it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.027002

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.82116D+03    |proj g|=  1.00000D+00

At iterate    1    f=  1.73611D+03    |proj g|=  6.44016D-01

At iterate    2    f=  1.73504D+03    |proj g|=  6.07315D-01

At iterate    3    f=  1.73401D+03    |proj g|=  4.57777D-01

At iterate    4    f=  1.73401D+03    |proj g|=  2.48229D-02


INFO:root:Theta updated. New theta_hat: [0.455 0.273 0.491 0.256]



At iterate    5    f=  1.73401D+03    |proj g|=  1.57530D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   1.575D-02   1.734D+03
  F =   1734.0111875515183     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated 

 30%|██▉       | 11848/40000 [12:40<14:08, 33.18it/s]

Iteration 20/100 | Max Change (Delta): 0.146262
Iteration 30/100 | Max Change (Delta): 0.116828
Iteration 40/100 | Max Change (Delta): 0.094975
Iteration 50/100 | Max Change (Delta): 0.077570
Iteration 60/100 | Max Change (Delta): 0.063423
Iteration 70/100 | Max Change (Delta): 0.051870
Iteration 80/100 | Max Change (Delta): 0.042423
Iteration 90/100 | Max Change (Delta): 0.034698


INFO:root:Policy updated.
 34%|███▎      | 13446/40000 [13:54<17:00, 26.02it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028380

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  2.48092D+03    |proj g|=  1.00000D+00

At iterate    1    f=  2.36900D+03    |proj g|=  6.51587D-01

At iterate    2    f=  2.36801D+03    |proj g|=  6.22750D-01

At iterate    3    f=  2.36702D+03    |proj g|=  4.65921D-01

At iterate    4    f=  2.36701D+03    |proj g|=  1.51980D-02


INFO:root:Theta updated. New theta_hat: [0.467 0.277 0.43  0.268]



At iterate    5    f=  2.36701D+03    |proj g|=  9.34772D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   9.348D-03   2.367D+03
  F =   2367.0119830577364     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated 

 34%|███▎      | 13446/40000 [14:10<17:00, 26.02it/s]

Iteration 20/100 | Max Change (Delta): 0.149587
Iteration 30/100 | Max Change (Delta): 0.119316
Iteration 40/100 | Max Change (Delta): 0.096931
Iteration 50/100 | Max Change (Delta): 0.079148
Iteration 60/100 | Max Change (Delta): 0.064708
Iteration 70/100 | Max Change (Delta): 0.052919
Iteration 80/100 | Max Change (Delta): 0.043282
Iteration 90/100 | Max Change (Delta): 0.035400


INFO:root:Policy updated.
 38%|███▊      | 15156/40000 [15:23<17:35, 23.53it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028954

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  3.16365D+03    |proj g|=  1.00000D+00

At iterate    1    f=  3.02605D+03    |proj g|=  6.58372D-01

At iterate    2    f=  3.02406D+03    |proj g|=  6.36389D-01

At iterate    3    f=  3.02212D+03    |proj g|=  5.04671D-01

At iterate    4    f=  3.02211D+03    |proj g|=  4.53497D-02


INFO:root:Theta updated. New theta_hat: [0.504 0.204 0.393 0.318]



At iterate    5    f=  3.02211D+03    |proj g|=  1.81228D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   1.812D-02   3.022D+03
  F =   3022.1137385238139     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated 

 38%|███▊      | 15156/40000 [15:40<17:35, 23.53it/s]

Iteration 20/100 | Max Change (Delta): 0.151785
Iteration 30/100 | Max Change (Delta): 0.121062
Iteration 40/100 | Max Change (Delta): 0.098329
Iteration 50/100 | Max Change (Delta): 0.080280
Iteration 60/100 | Max Change (Delta): 0.065631
Iteration 70/100 | Max Change (Delta): 0.053674
Iteration 80/100 | Max Change (Delta): 0.043898
Iteration 90/100 | Max Change (Delta): 0.035904


INFO:root:Policy updated.
 42%|████▏     | 16617/40000 [16:51<18:28, 21.10it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029366

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  3.86939D+03    |proj g|=  1.00000D+00

At iterate    1    f=  3.69534D+03    |proj g|=  6.50308D-01

At iterate    2    f=  3.69240D+03    |proj g|=  6.28213D-01

At iterate    3    f=  3.68943D+03    |proj g|=  5.23491D-01

At iterate    4    f=  3.68942D+03    |proj g|=  9.40678D-02

At iterate    5    f=  3.68942D+03    |proj g|=  2.23422D-02

At iterate    6    f=  3.68942D+03    |proj g|=  1.03990D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.522 0.185 0.403 0.337]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...


 42%|████▏     | 16617/40000 [17:10<18:28, 21.10it/s]

Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.202023
Iteration 20/100 | Max Change (Delta): 0.148714
Iteration 30/100 | Max Change (Delta): 0.118714
Iteration 40/100 | Max Change (Delta): 0.096490
Iteration 50/100 | Max Change (Delta): 0.078798
Iteration 60/100 | Max Change (Delta): 0.064424
Iteration 70/100 | Max Change (Delta): 0.052687
Iteration 80/100 | Max Change (Delta): 0.043092
Iteration 90/100 | Max Change (Delta): 0.035245


INFO:root:Policy updated.
 45%|████▌     | 18091/40000 [18:33<19:34, 18.66it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028827

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  4.60490D+03    |proj g|=  1.00000D+00

At iterate    1    f=  4.41203D+03    |proj g|=  6.64058D-01


 45%|████▌     | 18091/40000 [18:50<19:34, 18.66it/s]


At iterate    2    f=  4.40850D+03    |proj g|=  6.52052D-01

At iterate    3    f=  4.40455D+03    |proj g|=  5.31675D-01

At iterate    4    f=  4.40454D+03    |proj g|=  6.35790D-02

At iterate    5    f=  4.40454D+03    |proj g|=  8.09708D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   8.097D-03   4.405D+03
  F =   4404.5393403475300     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


INFO:root:Theta updated. New theta_hat: [0.53  0.165 0.357 0.338]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.209664
Iteration 20/100 | Max Change (Delta): 0.154030
Iteration 30/100 | Max Change (Delta): 0.122757
Iteration 40/100 | Max Change (Delta): 0.099668
Iteration 50/100 | Max Change (Delta): 0.081366
Iteration 60/100 | Max Change (Delta): 0.066516
Iteration 70/100 | Max Change (Delta): 0.054396
Iteration 80/100 | Max Change (Delta): 0.044489
Iteration 90/100 | Max Change (Delta): 0.036388


INFO:root:Policy updated.
 49%|████▉     | 19639/40000 [20:31<20:26, 16.61it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029762

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  5.33467D+03    |proj g|=  1.00000D+00

At iterate    1    f=  5.10979D+03    |proj g|=  6.62881D-01


 49%|████▉     | 19639/40000 [20:50<20:26, 16.61it/s]


At iterate    2    f=  5.10483D+03    |proj g|=  6.51718D-01

At iterate    3    f=  5.09914D+03    |proj g|=  5.53360D-01

At iterate    4    f=  5.09914D+03    |proj g|=  7.34703D-02

At iterate    5    f=  5.09914D+03    |proj g|=  2.65450D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   2.654D-02   5.099D+03
  F =   5099.1374275170811     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


INFO:root:Theta updated. New theta_hat: [0.551 0.144 0.357 0.344]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.209439
Iteration 20/100 | Max Change (Delta): 0.153950
Iteration 30/100 | Max Change (Delta): 0.122768
Iteration 40/100 | Max Change (Delta): 0.099710
Iteration 50/100 | Max Change (Delta): 0.081407
Iteration 60/100 | Max Change (Delta): 0.066552
Iteration 70/100 | Max Change (Delta): 0.054427
Iteration 80/100 | Max Change (Delta): 0.044514
Iteration 90/100 | Max Change (Delta): 0.036408


INFO:root:Policy updated.
 52%|█████▏    | 20977/40000 [22:40<22:11, 14.29it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029778

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  6.07092D+03    |proj g|=  1.00000D+00


 52%|█████▏    | 20977/40000 [22:50<22:11, 14.29it/s]


At iterate    1    f=  5.80928D+03    |proj g|=  6.59724D-01

At iterate    2    f=  5.80147D+03    |proj g|=  6.49064D-01

At iterate    3    f=  5.79245D+03    |proj g|=  5.83884D-01

At iterate    4    f=  5.79244D+03    |proj g|=  1.16317D-01

At iterate    5    f=  5.79244D+03    |proj g|=  3.01697D-02

At iterate    6    f=  5.79244D+03    |proj g|=  1.45060D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      6      8      9     0     0   1.451D-02   5.792D+03
  F =   5792.4390828897695     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


INFO:root:Theta updated. New theta_hat: [0.58  0.105 0.373 0.362]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.205337
Iteration 20/100 | Max Change (Delta): 0.151122
Iteration 30/100 | Max Change (Delta): 0.120615
Iteration 40/100 | Max Change (Delta): 0.098021
Iteration 50/100 | Max Change (Delta): 0.080045
Iteration 60/100 | Max Change (Delta): 0.065443
Iteration 70/100 | Max Change (Delta): 0.053521
Iteration 80/100 | Max Change (Delta): 0.043774
Iteration 90/100 | Max Change (Delta): 0.035802


INFO:root:Policy updated.
 56%|█████▌    | 22357/40000 [24:57<22:59, 12.79it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029283

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  6.82740D+03    |proj g|=  1.00000D+00


 56%|█████▌    | 22357/40000 [25:10<22:59, 12.79it/s]


At iterate    1    f=  6.54199D+03    |proj g|=  6.63482D-01

At iterate    2    f=  6.53270D+03    |proj g|=  6.51309D-01

At iterate    3    f=  6.52214D+03    |proj g|=  5.77699D-01

At iterate    4    f=  6.52212D+03    |proj g|=  1.55899D-01

At iterate    5    f=  6.52212D+03    |proj g|=  4.78934D-02

At iterate    6    f=  6.52212D+03    |proj g|=  2.19884D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      6      8      9     0     0   2.199D-02   6.522D+03
  F =   6522.1212552703018     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


INFO:root:Theta updated. New theta_hat: [0.574 0.09  0.379 0.362]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.207974
Iteration 20/100 | Max Change (Delta): 0.152977
Iteration 30/100 | Max Change (Delta): 0.122069
Iteration 40/100 | Max Change (Delta): 0.099180
Iteration 50/100 | Max Change (Delta): 0.080986
Iteration 60/100 | Max Change (Delta): 0.066211
Iteration 70/100 | Max Change (Delta): 0.054149
Iteration 80/100 | Max Change (Delta): 0.044287
Iteration 90/100 | Max Change (Delta): 0.036222


INFO:root:Policy updated.
 59%|█████▉    | 23648/40000 [27:09<23:09, 11.77it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029626

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  7.58203D+03    |proj g|=  1.00000D+00

At iterate    1    f=  7.26351D+03    |proj g|=  6.59924D-01

At iterate    2    f=  7.25483D+03    |proj g|=  6.51428D-01

At iterate    3    f=  7.24540D+03    |proj g|=  5.40758D-01

At iterate    4    f=  7.24539D+03    |proj g|=  1.15217D-01

At iterate    5    f=  7.24539D+03    |proj g|=  2.59730D-02

At iterate    6    f=  7.24539D+03    |proj g|=  1.38238D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.537 0.111 0.415 0.352]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.205958
Iteration 20/100 | Max Change (Delta): 0.151555
Iteration 30/100 | Max Change (Delta): 0.120947
Iteration 40/100 | Max Change (Delta): 0.098272
Iteration 50/100 | Max Change (Delta): 0.080245
Iteration 60/100 | Max Change (Delta): 0.065606
Iteration 70/100 | Max Change (Delta): 0.053654
Iteration 80/100 | Max Change (Delta): 0.043882
Iteration 90/100 | Max Change (Delta): 0.035891


INFO:root:Policy updated.
 62%|██████▏   | 24900/40000 [29:35<23:34, 10.68it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029356

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  8.35307D+03    |proj g|=  1.00000D+00

At iterate    1    f=  7.99980D+03    |proj g|=  6.57082D-01

At iterate    2    f=  7.99022D+03    |proj g|=  6.46832D-01

At iterate    3    f=  7.97977D+03    |proj g|=  5.37337D-01

At iterate    4    f=  7.97976D+03    |proj g|=  1.38159D-01

At iterate    5    f=  7.97976D+03    |proj g|=  2.16032D-02

At iterate    6    f=  7.97976D+03    |proj g|=  1.23050D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.534 0.108 0.42  0.362]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.204791
Iteration 20/100 | Max Change (Delta): 0.150777
Iteration 30/100 | Max Change (Delta): 0.120336
Iteration 40/100 | Max Change (Delta): 0.097792
Iteration 50/100 | Max Change (Delta): 0.079858
Iteration 60/100 | Max Change (Delta): 0.065291
Iteration 70/100 | Max Change (Delta): 0.053396
Iteration 80/100 | Max Change (Delta): 0.043672
Iteration 90/100 | Max Change (Delta): 0.035719


INFO:root:Policy updated.
 66%|██████▌   | 26327/40000 [32:01<21:57, 10.37it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029215

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  9.12536D+03    |proj g|=  1.00000D+00

At iterate    1    f=  8.74786D+03    |proj g|=  6.60933D-01

At iterate    2    f=  8.73772D+03    |proj g|=  6.35662D-01

At iterate    3    f=  8.72642D+03    |proj g|=  5.40211D-01

At iterate    4    f=  8.72641D+03    |proj g|=  1.72322D-01

At iterate    5    f=  8.72641D+03    |proj g|=  4.08208D-02

At iterate    6    f=  8.72641D+03    |proj g|=  2.16510D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.537 0.113 0.421 0.338]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.207014
Iteration 20/100 | Max Change (Delta): 0.152302
Iteration 30/100 | Max Change (Delta): 0.121545
Iteration 40/100 | Max Change (Delta): 0.098751
Iteration 50/100 | Max Change (Delta): 0.080634
Iteration 60/100 | Max Change (Delta): 0.065923
Iteration 70/100 | Max Change (Delta): 0.053913
Iteration 80/100 | Max Change (Delta): 0.044094
Iteration 90/100 | Max Change (Delta): 0.036065


INFO:root:Policy updated.
 69%|██████▉   | 27625/40000 [34:55<22:05,  9.33it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029497

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  9.92858D+03    |proj g|=  1.00000D+00

At iterate    1    f=  9.51702D+03    |proj g|=  6.60340D-01

At iterate    2    f=  9.50632D+03    |proj g|=  6.50753D-01

At iterate    3    f=  9.49432D+03    |proj g|=  5.29354D-01

At iterate    4    f=  9.49431D+03    |proj g|=  1.77108D-01

At iterate    5    f=  9.49431D+03    |proj g|=  5.57563D-02

At iterate    6    f=  9.49431D+03    |proj g|=  2.60682D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.526 0.109 0.423 0.354]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.205927
Iteration 20/100 | Max Change (Delta): 0.151442
Iteration 30/100 | Max Change (Delta): 0.120848
Iteration 40/100 | Max Change (Delta): 0.098182
Iteration 50/100 | Max Change (Delta): 0.080170
Iteration 60/100 | Max Change (Delta): 0.065543
Iteration 70/100 | Max Change (Delta): 0.053602
Iteration 80/100 | Max Change (Delta): 0.043840
Iteration 90/100 | Max Change (Delta): 0.035857


INFO:root:Policy updated.
 72%|███████▏  | 28973/40000 [38:18<22:03,  8.33it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029328

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.07188D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.02730D+04    |proj g|=  6.60004D-01

At iterate    2    f=  1.02621D+04    |proj g|=  6.48299D-01

At iterate    3    f=  1.02498D+04    |proj g|=  5.21317D-01

At iterate    4    f=  1.02498D+04    |proj g|=  1.62503D-01

At iterate    5    f=  1.02498D+04    |proj g|=  3.89481D-02

At iterate    6    f=  1.02498D+04    |proj g|=  2.14124D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.518 0.113 0.42  0.363]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.205733
Iteration 20/100 | Max Change (Delta): 0.151337
Iteration 30/100 | Max Change (Delta): 0.120762
Iteration 40/100 | Max Change (Delta): 0.098117
Iteration 50/100 | Max Change (Delta): 0.080117
Iteration 60/100 | Max Change (Delta): 0.065500
Iteration 70/100 | Max Change (Delta): 0.053567
Iteration 80/100 | Max Change (Delta): 0.043812
Iteration 90/100 | Max Change (Delta): 0.035833


INFO:root:Policy updated.
 75%|███████▌  | 30179/40000 [42:15<23:05,  7.09it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029308

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.15111D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.10412D+04    |proj g|=  6.63793D-01

At iterate    2    f=  1.10294D+04    |proj g|=  6.49412D-01

At iterate    3    f=  1.10159D+04    |proj g|=  5.08540D-01

At iterate    4    f=  1.10159D+04    |proj g|=  1.52915D-01

At iterate    5    f=  1.10159D+04    |proj g|=  2.22874D-02

At iterate    6    f=  1.10159D+04    |proj g|=  1.36965D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.506 0.102 0.423 0.37 ]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.208264
Iteration 20/100 | Max Change (Delta): 0.153148
Iteration 30/100 | Max Change (Delta): 0.122179
Iteration 40/100 | Max Change (Delta): 0.099250
Iteration 50/100 | Max Change (Delta): 0.081037
Iteration 60/100 | Max Change (Delta): 0.066251
Iteration 70/100 | Max Change (Delta): 0.054181
Iteration 80/100 | Max Change (Delta): 0.044313
Iteration 90/100 | Max Change (Delta): 0.036244


INFO:root:Policy updated.
 78%|███████▊  | 31283/40000 [46:46<24:31,  5.92it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029644

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.23072D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.18060D+04    |proj g|=  6.63066D-01

At iterate    2    f=  1.17937D+04    |proj g|=  6.48998D-01

At iterate    3    f=  1.17798D+04    |proj g|=  5.12309D-01

At iterate    4    f=  1.17798D+04    |proj g|=  1.82758D-01

At iterate    5    f=  1.17798D+04    |proj g|=  2.97565D-02

At iterate    6    f=  1.17798D+04    |proj g|=  1.89991D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

 81%|████████▏ | 32514/40000 [50:58<11:44, 10.63it/s]


KeyboardInterrupt: 

In [None]:
degradation_df = pd.DataFrame(simulator.degradation_history)
simulation_df = pd.DataFrame(simulator.history)

degradation_df.to_csv(f'data/degradation_data_{current_time}.csv', index=False)
simulation_df.to_csv(f'data/simulation_data_{current_time}.csv', index=False)
simulator.save(f'models/simulator_{current_time}')

### Convergence of $\hat\theta$

In [None]:
# simulator = Simulator.load('models/simulator_0914')

history = pd.DataFrame(simulator.history)
degradation_history = pd.DataFrame(simulator.degradation_history)

epsilons = [0.20 * (0.95 ** i) for i in range(len(simulator.theta_updates))]

times = []

for d in simulator.theta_updates:
    idx, theta_hat = d['customer_idx'], d['theta_hat']
    time = history[history.customer_id == idx]['calendar_time'].max()
    times.append(time)
    
# plot L2, and L-inf norms of utility updates
L2_errors = [np.linalg.norm(update['theta_hat'] - THETA_TRUE) for update in simulator.theta_updates]
Linf_errors = [np.linalg.norm(update['theta_hat'] - THETA_TRUE, ord=np.inf) for update in simulator.theta_updates]

plt.figure(figsize=(12, 6))
plt.plot(times, L2_errors, label='$L_2$ Norm Error', marker='o')
plt.plot(times, Linf_errors, label='$L_\infty$ Norm Error', marker='x')
plt.plot(times, epsilons, label='Exploration Rate (ε)', linestyle='--', color='gray')
# plt.yscale('log')
plt.xlabel('Number of Customers Processed', fontsize=14)
plt.ylabel('Error Norm', fontsize=14)

plt.title('Convergence of $\|\hat{\\theta} - \\theta\|$', fontsize=18)
plt.legend(fontsize=12)
plt.grid(True)
plt.savefig('figures/utility_convergence.pdf')
plt.show()

### Convergence of $\hat u$

In [None]:
simulator.utility_updates

# plot L2, and L-inf norms of utility updates
L2_errors = [np.linalg.norm(update['u_hat'] - UTILITY_TRUE) for update in simulator.utility_updates]
Linf_errors = [np.linalg.norm(update['u_hat'] - UTILITY_TRUE, ord=np.inf) for update in simulator.utility_updates]

plt.figure(figsize=(12, 6))
plt.plot(L2_errors, label='$L_2$ Norm Error', marker='o')
plt.plot(Linf_errors, label='$L_\infty$ Norm Error', marker='x')
# plt.yscale('log')
plt.xlabel('Number of Customers Processed', fontsize=14)
plt.ylabel('Error Norm', fontsize=14)

plt.title('Convergence of $\|\hat u - u\|$', fontsize=18)
plt.legend(fontsize=12)
plt.grid(True)
plt.savefig('figures/utility_convergence.pdf')
plt.show()

### Revenue of Online Learner

In [None]:
degradation_df = pd.DataFrame(simulator.degradation_history)
simulation_df = pd.DataFrame(simulator.history)

simulation_df['net_profit'] = simulation_df['profit'] + simulation_df['loss']
simulation_df['cumulative_net_profit'] = simulation_df['net_profit'].cumsum()

ax = plt.figure(figsize=(10,6))

# plot cumulative profit and loss over time
plt.plot(simulation_df['calendar_time'], simulation_df['cumulative_net_profit'], label='Cumulative Net Profit')
plt.xlabel('Calendar Time')
plt.ylabel('Cumulative Net Profit')
plt.title('Cumulative Net Profit Over Time')
plt.legend()
plt.grid()
plt.savefig('figures/cumulative_net_profit_online.pdf')
plt.show()

## Training policy under perfect information

### Revenue of Optimal Policy

In [None]:
class PerfectDegradationLearner:
    def __init__(self, d, theta_true, hazard_model):
        self.d = d
        self.theta_true = theta_true
        self.hazard_model = hazard_model  # Placeholder, not used
        
    def get_theta(self):
        return self.theta_true
    
    def cum_baseline(self, t):
        return self.hazard_model.Lambda_0(t)
    
    def inverse_cum_baseline(self, u):
        return self.hazard_model.Lambda_0_inverse(u)
    
perfect_degradation_learner = PerfectDegradationLearner(
    d=D, 
    theta_true=THETA_TRUE,
    hazard_model=usage_exp_hazard_model,
)

# N_perfect = [100, 40, 80, 150]

perfect_dpagent = DiscretizedDPAgent(
    N=simulator.training_hyperparams['N'], # grid sizes [cum_context, context, duration, active_time]
    max_cumulative_context=simulator.training_hyperparams['max_cumulative_context'],
    # max_active_time=simulator.training_hyperparams['max_active_time'],
    u_hat=UTILITY_TRUE,
    degradation_learner=perfect_degradation_learner,
    customer_generator=customer_gen,
    params=simulator.mdp_params,
)
# perfect_dpagent.run_value_iteration(simulator.training_hyperparams['num_value_iterations'])

# weight = torch.load('weights/perfect_dpagent_q_network.pth', map_location=torch.device('cuda'))
# perfect_dpagent.q_network.load_state_dict(weight)
# perfect_dpagent.q_network.to(perfect_dpagent.device)
# perfect_dpagent.q_network.eval()
perfect_dpagent._precompute_dynamics(100000)
perfect_dpagent.run_value_iteration(150)
perfect_policy = perfect_dpagent.get_policy('greedy')

In [None]:
simulation_df = pd.DataFrame(simulator.history)
# simulator.degradation_learner = perfect_degradation_learner
samples = simulator.run_full_exploit(100000, perfect_policy, {'tau': 0.01})
samples = pd.DataFrame(samples)

simulation_df['net_profit'] = simulation_df['profit'] + simulation_df['loss']
simulation_df['cumulative_net_profit'] = simulation_df['net_profit'].cumsum()
samples['net_profit'] = samples['profit'] + samples['loss']
samples['cumulative_net_profit'] = samples['net_profit'].cumsum()

samples['netprofit_per_time'] = samples['cumulative_net_profit'] / samples['calendar_time']
simulation_df['netprofit_per_time'] = simulation_df['cumulative_net_profit'] / simulation_df['calendar_time']

In [None]:
def calculate_rolling_rate(df, time_col, value_col, window_size):
    """
    Calculates the rate of a value over a rolling time window on irregular time series data.

    Args:
        df (pd.DataFrame): The input dataframe.
        time_col (str): The name of the column with time data.
        value_col (str): The name of the column with values to aggregate (e.g., 'net_profit').
        window_size (int): The duration of the rolling time window.

    Returns:
        pd.Series: A series containing the calculated rolling rate for each row.
    """
    # Ensure the dataframe is sorted by time, which is crucial.
    df = df.sort_values(time_col).reset_index(drop=True)
    
    times = df[time_col].values
    values = df[value_col].values
    
    # For each end time `t_i`, find the start time `t_i - window`.
    start_times = times - window_size
    
    # Use searchsorted to find the index where each start_time would be inserted.
    # This gives us the starting index of each time window efficiently.
    start_indices = np.searchsorted(times, start_times, side='left')
    
    # Use a cumulative sum to efficiently calculate the sum over any slice [j, i].
    value_cumsum = np.cumsum(values)
    
    # The sum for a window ending at `i` is cumsum[i] - cumsum[start_index - 1].
    # We create a shifted cumulative sum array to handle the `start_index - 1` lookup.
    shifted_cumsum = np.concatenate(([0], value_cumsum[:-1]))
    
    # Calculate the sum of values within each rolling window.
    window_sums = value_cumsum - shifted_cumsum[start_indices]
    
    # The rate is the sum of profit in the window divided by the window's duration.
    profit_rate = window_sums / window_size
    
    return pd.Series(profit_rate, index=df.index)


# --- 2. Calculate net profit and the rolling rate for each DataFrame ---

window_duration = 20000 # Define the time window for the rolling rate

for df in [simulation_df]:
# for df in [samples]:
    df['net_profit'] = df['profit'] + df['loss']
    # Add the new 'profit_rate' column using our helper function
    df['profit_rate'] = calculate_rolling_rate(df, 'calendar_time', 'net_profit', window_duration)

In [None]:
# --- 3. Plot the new rolling profit rate ---

# max_time = min(simulation_df['calendar_time'].max(), samples['calendar_time'].max())
max_time = simulation_df['calendar_time'].max()
# samples_plot = samples[(window_duration <= samples['calendar_time']) & (samples['calendar_time'] <= max_time)]
simulations_plot = simulation_df[
    (window_duration <= simulation_df['calendar_time']) &
    (simulation_df['calendar_time'] <= max_time)]


plt.figure(figsize=(10, 6))

# plt.plot(samples_plot['calendar_time'], samples_plot['profit_rate'], label=f'Optimal Policy (Rolling {window_duration} unit avg)')
plt.plot(simulations_plot['calendar_time'], simulations_plot['profit_rate'], label=f'Online Learning (Rolling {window_duration} unit avg)')

plt.xlabel('Calendar Time')
plt.ylabel('Profit Rate (Profit / Time Unit)')
plt.title(f'Rolling Profit Rate Over Time (Window = {window_duration} time units)')
plt.legend()
plt.grid(True)
plt.show()