In [1]:
import pickle
import numpy as np
import pandas as pd
from scipy.optimize import minimize
import torch

from policy import DPAgent
from new_new_policy import DiscretizedDPAgent
from simulation import Simulator, CustomerGenerator
from hazard_models import ExponentialHazard
from utility_learner import ProjectedVolumeLearner, diam
from degradation_learner import DegradationLearner
from datetime import datetime
from pytz import timezone

from utils import unit_ball_rejection_sample, correct_signs
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

import logging
logging.basicConfig(level=logging.INFO)

np.set_printoptions(suppress=True)

In [2]:
# --- 2. Define Sampling Functions ---
# def context_sampler() -> np.ndarray:
#     """Samples a customer's context vector from a uniform distribution."""
#     return np.random.uniform(low=0.0, high=1.0, size=D)

def context_sampler() -> np.ndarray:
    """Samples a customer's context vector uniformly from the unit ball."""
    return np.abs(unit_ball_rejection_sample(D))

def rental_sampler() -> float:
    """Samples a customer's desired rental duration from an exponential distribution."""
    return np.random.exponential(scale=10.0)

def interarrival_sampler() -> float:
    """Samples the time until the next customer arrives."""
    return np.random.exponential(scale=5.0)

In [3]:
# --- 1. Simulation Configuration ---
D = 4                                  # Dimension of context vectors
LAMBDA_VAL = 0.001                     # Baseline hazard constant
NUM_CUSTOMERS = 40000                   # Total number of customers to simulate, i.e. T

# Set a random seed for reproducibility
# np.random.seed(41)

# Ground truth vectors
THETA_TRUE = np.array([0.5, 0.2, 0.4, 0.3])#$, 0.4])    # For degradation
UTILITY_TRUE = np.array([0.372450167, 0.10850869, 0.33930126, 0.71356037])

# context_sampler()  # For customer's willingness to pay

# --- Machine's Pricing Vector 'r' ---
# This is a fallback pricing vector, when we don't feed u_hat to calculate_price
PRICING_R = np.zeros(D)

In [4]:
usage_exp_hazard_model = ExponentialHazard(lambda_val=LAMBDA_VAL)
# spontaneous_exp_hazard_model = None # ExponentialHazard(lambda_val=0.01)

customer_gen = CustomerGenerator(
    d=D,
    context_sampler=context_sampler,
    rental_sampler=rental_sampler,
    interarrival_sampler=interarrival_sampler
)

centroid_params = {
    # 'num_samples': 2000,
    # 'thin': None,
    # 'burn_in': 500 * D ** 2,
    # 'tol': 1e-4,
    # 'rho_target': 0.01
}

termination_rule = lambda diameter: diameter < 0.0005  # Example custom termination rule

projected_volume_learner = ProjectedVolumeLearner(
    T=NUM_CUSTOMERS, 
    d=D, 
    centroid_params=centroid_params,
    incentive_constant=1.1,
    termination_rule=termination_rule,
)

mdp_params = {
    'duration_lambda': 10.0,
    'interarrival_lambda': 5.0,
    'replacement_cost': 1.5,   # Cost to replace the machine
    'failure_cost': 0.75,      # Additional penalty for in-service failure
    'holding_cost_rate': 0.02,   # Cost per unit of idle time
    'gamma': 0.99,             # Discount factor
    'learning_rate': 1e-3,      # Learning rate for the Adam optimizer
    'target_update_freq': 10    # How often to update the target network (in iterations)
}

training_hyperparams = {
    # For FQI
    'num_iterations': 1, # Number of training iterations per policy update
    'dataset_size': 50000,      # Number of transitions to generate for the offline dataset
    'batch_size': 256,           # Batch size for training

    # For discrete DP
    # 'N': [80, 20, 60, 150], # grid sizes [cum_context, context, duration, active_time
    'N': [100, 50, 100, 100], # grid sizes [cum_context, context, revenue, duration]
    'max_cumulative_context': 8.0,
    # 'max_active_time': 150.0,
    'num_value_iterations': 100,
    
}

policy_type = 'decaying_epsilon_greedy'
policy_kwargs = {
    'current_epsilon': 0.10,
    'decay_rate': 0.95,
    'step': 0,
}

# Instantiate the Simulator with the new parameters
simulator = Simulator(
    d=D,
    T=NUM_CUSTOMERS,
    
    theta_true=THETA_TRUE,
    utility_true=UTILITY_TRUE,
    pricing_r=PRICING_R,
    
    usage_hazard_model=usage_exp_hazard_model,
    customer_generator=customer_gen,
    projected_volume_learner=projected_volume_learner,  # Use default ProjectedVolumeLearner
    
    mdp_params=mdp_params,
    discrete_dp=True,
    policy_type=policy_type,
    training_hyperparams=training_hyperparams,
    policy_kwargs=policy_kwargs,
    policy_update_threshold=100,
    time_normalize=True,
)

In [5]:
# # Lets you skip utility exploration with perfect u starting point
# simulator.projected_volume_learner.centroids.append(UTILITY_TRUE)
# simulator.projected_volume_learner.is_terminated = True
# simulator.seen_breakdowns = 2

# degradation_learner = DegradationLearner(d=simulator.d)
# degradation_learner.theta = np.ones(D) * 0.1
# degradation_learner.cum_baseline = lambda x: LAMBDA_VAL * x
# degradation_learner.inverse_cum_baseline = lambda y: y / LAMBDA_VAL
# simulator.degradation_learner = degradation_learner

# # dp_agent = DPAgent(
# #     d=simulator.d,
# #     u_hat=UTILITY_TRUE,
# #     time_normalize=simulator.time_normalize,
# #     degradation_learner=simulator.degradation_learner,
# #     customer_generator=simulator.customer_generator,
# #     params=simulator.mdp_params
# # )
# # dp_agent.train(**simulator.training_hyperparams)

# dp_agent = DiscretizedDPAgent(
#     N=training_hyperparams['N'], # grid sizes [cum_context, context, duration, active_time]
#     max_cumulative_context=training_hyperparams['max_cumulative_context'],
#     # max_active_time=training_hyperparams['max_active_time'],
#     u_hat=UTILITY_TRUE,
#     degradation_learner=degradation_learner,
#     customer_generator=customer_gen,
#     params=mdp_params,
# )
# # dp_agent._precompute_dynamics(num_samples=50000)
# dp_agent.run_value_iteration(100)

# simulator.dp_agent = dp_agent
# simulator.optimal_policy = dp_agent.get_policy(simulator.policy_type)
# simulator.breakdowns_since_last_update = 0 # Reset the counter


In [6]:
pacific_tz = timezone('America/Los_Angeles')
current_time = datetime.now(pacific_tz).strftime("%Y%m%d_%H%M%S")

# simulator.projected_volume_learner.is_terminated = True
simulation_data = simulator.run(num_customers=NUM_CUSTOMERS)
degradation_df = pd.DataFrame(simulator.degradation_history)
simulation_df = pd.DataFrame(simulator.history)

degradation_df.to_csv(f'data/degradation_data_{current_time}.csv', index=False)
simulation_df.to_csv(f'data/simulation_data_{current_time}.csv', index=False)
simulator.save(f'models/simulator_{current_time}')

INFO:root:Starting simulation for 40000 customers...
  0%|          | 0/40000 [00:00<?, ?it/s]

Set parameter Username


INFO:gurobipy:Set parameter Username


Set parameter LicenseID to value 2651514


INFO:gurobipy:Set parameter LicenseID to value 2651514


Academic license - for non-commercial use only - expires 2026-04-14


INFO:gurobipy:Academic license - for non-commercial use only - expires 2026-04-14
INFO:root:Customer 1: Diameter: 0.9933
  0%|          | 1/40000 [00:03<34:42:00,  3.12s/it]INFO:root:Customer 2: Diameter: 0.6804
  0%|          | 2/40000 [00:06<36:41:05,  3.30s/it]INFO:root:Customer 3: Diameter: 0.6768
  0%|          | 3/40000 [00:10<39:27:08,  3.55s/it]INFO:root:Customer 3: Diameter: 0.6691
  0%|          | 4/40000 [00:14<42:22:26,  3.81s/it]INFO:root:Customer 4: Diameter: 0.6095
  0%|          | 5/40000 [00:19<45:28:42,  4.09s/it]INFO:root:Customer 4: Diameter: 0.2057
  0%|          | 6/40000 [00:24<49:02:21,  4.41s/it]INFO:root:Customer 5: Diameter: 0.6168
  0%|          | 7/40000 [00:29<52:23:52,  4.72s/it]INFO:root:Customer 6: Diameter: 0.3673
  0%|          | 8/40000 [00:35<55:35:37,  5.00s/it]INFO:root:Customer 7: Diameter: 0.2814
  0%|          | 9/40000 [00:41<58:31:22,  5.27s/it]INFO:root:Customer 8: Diameter: 0.3588
  0%|          | 10/40000 [00:47<61:32:56,  5.54s/it]INFO:ro

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  3.17805D+00    |proj g|=  1.00000D+00

At iterate    1    f=  8.19697D-01    |proj g|=  3.05142D-01

At iterate    2    f=  7.75176D-01    |proj g|=  2.61161D-01

At iterate    3    f=  6.73739D-01    |proj g|=  5.46612D-02

At iterate    4    f=  6.71556D-01    |proj g|=  2.55628D-02

At iterate    5    f=  6.70907D-01    |proj g|=  1.72562D-03

At iterate    6    f=  6.70904D-01    |proj g|=  5.66492D-05

At iterate    7    f=  6.70904D-01    |proj g|=  1.27777D-07

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function 

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.094873
Iteration 20/100 | Max Change (Delta): 0.064984
Iteration 30/100 | Max Change (Delta): 0.052534
Iteration 40/100 | Max Change (Delta): 0.042941
Iteration 50/100 | Max Change (Delta): 0.035121
Iteration 60/100 | Max Change (Delta): 0.028725
Iteration 70/100 | Max Change (Delta): 0.023495
Iteration 80/100 | Max Change (Delta): 0.019216
Iteration 90/100 | Max Change (Delta): 0.015717


INFO:root:Policy updated.
  8%|▊         | 3348/40000 [09:01<14:59, 40.75it/s]  INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.012855

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  5.98117D+02    |proj g|=  1.00000D+00

At iterate    1    f=  5.73233D+02    |proj g|=  5.93776D-01

At iterate    2    f=  5.72819D+02    |proj g|=  5.35317D-01

At iterate    3    f=  5.72459D+02    |proj g|=  1.01426D-01


INFO:root:Theta updated. New theta_hat: [0.605 0.358 0.402 0.255]



At iterate    4    f=  5.72458D+02    |proj g|=  1.63755D-02

At iterate    5    f=  5.72458D+02    |proj g|=  5.24267D-03

At iterate    6    f=  5.72458D+02    |proj g|=  3.91915D-04

At iterate    7    f=  5.72458D+02    |proj g|=  1.49063D-05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      7      9     10     0     0   1.491D-05   5.725D+02
  F =   572.45831284813653     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 

  8%|▊         | 3348/40000 [09:20<14:59, 40.75it/s]

Iteration 20/100 | Max Change (Delta): 0.133317
Iteration 30/100 | Max Change (Delta): 0.106869
Iteration 40/100 | Max Change (Delta): 0.087054
Iteration 50/100 | Max Change (Delta): 0.071145
Iteration 60/100 | Max Change (Delta): 0.058180
Iteration 70/100 | Max Change (Delta): 0.047584
Iteration 80/100 | Max Change (Delta): 0.038919
Iteration 90/100 | Max Change (Delta): 0.031832


INFO:root:Policy updated.
 15%|█▌        | 6196/40000 [10:23<15:03, 37.40it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.026036

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.18286D+03    |proj g|=  1.00000D+00

At iterate    1    f=  1.12658D+03    |proj g|=  6.43439D-01

At iterate    2    f=  1.12586D+03    |proj g|=  5.85652D-01

At iterate    3    f=  1.12491D+03    |proj g|=  4.38949D-01

At iterate    4    f=  1.12491D+03    |proj g|=  7.42393D-02

At iterate    5    f=  1.12491D+03    |proj g|=  4.04273D-02

At iterate    6    f=  1.12491D+03    |proj g|=  2.87952D-03


INFO:root:Theta updated. New theta_hat: [0.571 0.217 0.342 0.352]



At iterate    7    f=  1.12491D+03    |proj g|=  4.65887D-04

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      7      9     10     0     0   4.659D-04   1.125D+03
  F =   1124.9070312257215     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated 

 15%|█▌        | 6196/40000 [10:40<15:03, 37.40it/s]

Iteration 20/100 | Max Change (Delta): 0.145039
Iteration 30/100 | Max Change (Delta): 0.115918
Iteration 40/100 | Max Change (Delta): 0.094289
Iteration 50/100 | Max Change (Delta): 0.077020
Iteration 60/100 | Max Change (Delta): 0.062976
Iteration 70/100 | Max Change (Delta): 0.051504
Iteration 80/100 | Max Change (Delta): 0.042125
Iteration 90/100 | Max Change (Delta): 0.034454


INFO:root:Policy updated.
 20%|██        | 8019/40000 [11:42<17:17, 30.84it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028180

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.79670D+03    |proj g|=  1.00000D+00

At iterate    1    f=  1.71697D+03    |proj g|=  6.71021D-01

At iterate    2    f=  1.71519D+03    |proj g|=  6.25941D-01

At iterate    3    f=  1.71317D+03    |proj g|=  4.57882D-01

At iterate    4    f=  1.71316D+03    |proj g|=  9.84783D-02

At iterate    5    f=  1.71316D+03    |proj g|=  1.55085D-02

At iterate    6    f=  1.71316D+03    |proj g|=  1.11925D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.51  0.151 0.448 0.269]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.212691


 20%|██        | 8019/40000 [12:00<17:17, 30.84it/s]

Iteration 20/100 | Max Change (Delta): 0.156380
Iteration 30/100 | Max Change (Delta): 0.124497
Iteration 40/100 | Max Change (Delta): 0.101073
Iteration 50/100 | Max Change (Delta): 0.082503
Iteration 60/100 | Max Change (Delta): 0.067444
Iteration 70/100 | Max Change (Delta): 0.055155
Iteration 80/100 | Max Change (Delta): 0.045109
Iteration 90/100 | Max Change (Delta): 0.036895


INFO:root:Policy updated.
 24%|██▍       | 9610/40000 [13:02<19:05, 26.54it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.030177

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  2.44346D+03    |proj g|=  1.00000D+00

At iterate    1    f=  2.33411D+03    |proj g|=  6.65825D-01

At iterate    2    f=  2.33095D+03    |proj g|=  6.32881D-01

At iterate    3    f=  2.32707D+03    |proj g|=  5.87865D-01

At iterate    4    f=  2.32707D+03    |proj g|=  4.10972D-02

At iterate    5    f=  2.32707D+03    |proj g|=  3.67665D-03

At iterate    6    f=  2.32707D+03    |proj g|=  2.65415D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.585 0.161 0.429 0.216]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.210424


 24%|██▍       | 9610/40000 [13:20<19:05, 26.54it/s]

Iteration 20/100 | Max Change (Delta): 0.154944
Iteration 30/100 | Max Change (Delta): 0.123486
Iteration 40/100 | Max Change (Delta): 0.100312
Iteration 50/100 | Max Change (Delta): 0.081901
Iteration 60/100 | Max Change (Delta): 0.066956
Iteration 70/100 | Max Change (Delta): 0.054757
Iteration 80/100 | Max Change (Delta): 0.044785
Iteration 90/100 | Max Change (Delta): 0.036630


INFO:root:Policy updated.
 28%|██▊       | 11059/40000 [14:22<20:32, 23.48it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029960

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  3.08457D+03    |proj g|=  1.00000D+00

At iterate    1    f=  2.93155D+03    |proj g|=  6.48165D-01

At iterate    2    f=  2.92749D+03    |proj g|=  6.12047D-01

At iterate    3    f=  2.92292D+03    |proj g|=  5.86143D-01

At iterate    4    f=  2.92291D+03    |proj g|=  4.78076D-02


INFO:root:Theta updated. New theta_hat: [0.583 0.196 0.464 0.228]



At iterate    5    f=  2.92291D+03    |proj g|=  4.72664D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   4.727D-03   2.923D+03
  F =   2922.9143755642308     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated 

 28%|██▊       | 11059/40000 [14:40<20:32, 23.48it/s]

Iteration 20/100 | Max Change (Delta): 0.146770
Iteration 30/100 | Max Change (Delta): 0.117239
Iteration 40/100 | Max Change (Delta): 0.095334
Iteration 50/100 | Max Change (Delta): 0.077871
Iteration 60/100 | Max Change (Delta): 0.063671
Iteration 70/100 | Max Change (Delta): 0.052073
Iteration 80/100 | Max Change (Delta): 0.042590
Iteration 90/100 | Max Change (Delta): 0.034834


INFO:root:Policy updated.
 30%|███       | 12190/40000 [15:44<22:55, 20.22it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028491

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  3.78977D+03    |proj g|=  1.00000D+00

At iterate    1    f=  3.61126D+03    |proj g|=  6.54247D-01

At iterate    2    f=  3.60454D+03    |proj g|=  6.26681D-01

At iterate    3    f=  3.59703D+03    |proj g|=  6.40288D-01

At iterate    4    f=  3.59703D+03    |proj g|=  8.02369D-02

At iterate    5    f=  3.59703D+03    |proj g|=  5.45492D-03


INFO:root:Theta updated. New theta_hat: [0.635 0.154 0.439 0.231]



At iterate    6    f=  3.59703D+03    |proj g|=  2.54283D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      6      8      9     0     0   2.543D-03   3.597D+03
  F =   3597.0260823864301     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated 

 30%|███       | 12190/40000 [16:00<22:55, 20.22it/s]

Iteration 20/100 | Max Change (Delta): 0.148232
Iteration 30/100 | Max Change (Delta): 0.118428
Iteration 40/100 | Max Change (Delta): 0.096306
Iteration 50/100 | Max Change (Delta): 0.078664
Iteration 60/100 | Max Change (Delta): 0.064319
Iteration 70/100 | Max Change (Delta): 0.052603
Iteration 80/100 | Max Change (Delta): 0.043023
Iteration 90/100 | Max Change (Delta): 0.035189


INFO:root:Policy updated.
 34%|███▍      | 13647/40000 [17:05<22:33, 19.47it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028781

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  4.53212D+03    |proj g|=  1.00000D+00

At iterate    1    f=  4.32451D+03    |proj g|=  6.53495D-01

At iterate    2    f=  4.31766D+03    |proj g|=  6.32428D-01

At iterate    3    f=  4.30980D+03    |proj g|=  6.35317D-01

At iterate    4    f=  4.30979D+03    |proj g|=  9.10805D-02

At iterate    5    f=  4.30979D+03    |proj g|=  1.23319D-02


INFO:root:Theta updated. New theta_hat: [0.63  0.178 0.42  0.227]



At iterate    6    f=  4.30979D+03    |proj g|=  6.06709D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      6      8      9     0     0   6.067D-03   4.310D+03
  F =   4309.7925222906069     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated 

 34%|███▍      | 13647/40000 [17:20<22:33, 19.47it/s]

Iteration 20/100 | Max Change (Delta): 0.148202
Iteration 30/100 | Max Change (Delta): 0.118350
Iteration 40/100 | Max Change (Delta): 0.096230
Iteration 50/100 | Max Change (Delta): 0.078598
Iteration 60/100 | Max Change (Delta): 0.064264
Iteration 70/100 | Max Change (Delta): 0.052557
Iteration 80/100 | Max Change (Delta): 0.042986
Iteration 90/100 | Max Change (Delta): 0.035158


INFO:root:Policy updated.
 38%|███▊      | 15236/40000 [18:27<21:12, 19.46it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028756

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  5.26793D+03    |proj g|=  1.00000D+00

At iterate    1    f=  5.02601D+03    |proj g|=  6.50997D-01

At iterate    2    f=  5.01946D+03    |proj g|=  6.24888D-01

At iterate    3    f=  5.01185D+03    |proj g|=  5.95603D-01

At iterate    4    f=  5.01184D+03    |proj g|=  1.14861D-01

At iterate    5    f=  5.01184D+03    |proj g|=  1.27158D-02


INFO:root:Theta updated. New theta_hat: [0.591 0.187 0.442 0.233]



At iterate    6    f=  5.01184D+03    |proj g|=  7.68470D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      6      8      9     0     0   7.685D-03   5.012D+03
  F =   5011.8427416829363     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated 

 38%|███▊      | 15236/40000 [18:40<21:12, 19.46it/s]

Iteration 10/100 | Max Change (Delta): 0.200982
Iteration 20/100 | Max Change (Delta): 0.148322
Iteration 30/100 | Max Change (Delta): 0.118442
Iteration 40/100 | Max Change (Delta): 0.096294
Iteration 50/100 | Max Change (Delta): 0.078647
Iteration 60/100 | Max Change (Delta): 0.064303
Iteration 70/100 | Max Change (Delta): 0.052590
Iteration 80/100 | Max Change (Delta): 0.043012
Iteration 90/100 | Max Change (Delta): 0.035180


INFO:root:Policy updated.
 42%|████▏     | 16791/40000 [19:49<20:04, 19.26it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028774

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  6.01492D+03    |proj g|=  1.00000D+00

At iterate    1    f=  5.74805D+03    |proj g|=  6.55505D-01

At iterate    2    f=  5.74275D+03    |proj g|=  6.33509D-01


 42%|████▏     | 16791/40000 [20:00<20:04, 19.26it/s]


At iterate    3    f=  5.73666D+03    |proj g|=  5.54172D-01

At iterate    4    f=  5.73666D+03    |proj g|=  9.04516D-02

At iterate    5    f=  5.73666D+03    |proj g|=  1.04353D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   1.044D-02   5.737D+03
  F =   5736.6605727340975     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


INFO:root:Theta updated. New theta_hat: [0.552 0.201 0.42  0.26 ]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.204526
Iteration 20/100 | Max Change (Delta): 0.150595
Iteration 30/100 | Max Change (Delta): 0.120191
Iteration 40/100 | Max Change (Delta): 0.097663
Iteration 50/100 | Max Change (Delta): 0.079750
Iteration 60/100 | Max Change (Delta): 0.065202
Iteration 70/100 | Max Change (Delta): 0.053323
Iteration 80/100 | Max Change (Delta): 0.043612
Iteration 90/100 | Max Change (Delta): 0.035670


INFO:root:Policy updated.
 46%|████▌     | 18233/40000 [21:27<20:31, 17.67it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029175

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  6.76805D+03    |proj g|=  1.00000D+00

At iterate    1    f=  6.47015D+03    |proj g|=  6.54930D-01

At iterate    2    f=  6.46485D+03    |proj g|=  6.36171D-01

At iterate    3    f=  6.45867D+03    |proj g|=  5.50701D-01


 46%|████▌     | 18233/40000 [21:40<20:31, 17.67it/s]


At iterate    4    f=  6.45867D+03    |proj g|=  8.90552D-02

At iterate    5    f=  6.45867D+03    |proj g|=  4.56938D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   4.569D-03   6.459D+03
  F =   6458.6656219626675     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


INFO:root:Theta updated. New theta_hat: [0.549 0.228 0.412 0.243]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.204459
Iteration 20/100 | Max Change (Delta): 0.150527
Iteration 30/100 | Max Change (Delta): 0.120091
Iteration 40/100 | Max Change (Delta): 0.097573
Iteration 50/100 | Max Change (Delta): 0.079675
Iteration 60/100 | Max Change (Delta): 0.065140
Iteration 70/100 | Max Change (Delta): 0.053272
Iteration 80/100 | Max Change (Delta): 0.043571
Iteration 90/100 | Max Change (Delta): 0.035636


INFO:root:Policy updated.
 49%|████▊     | 19478/40000 [23:15<22:02, 15.51it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029147

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  7.53016D+03    |proj g|=  1.00000D+00

At iterate    1    f=  7.20933D+03    |proj g|=  6.58092D-01

At iterate    2    f=  7.20283D+03    |proj g|=  6.27267D-01


 49%|████▊     | 19478/40000 [23:30<22:02, 15.51it/s]


At iterate    3    f=  7.19524D+03    |proj g|=  5.33778D-01

At iterate    4    f=  7.19523D+03    |proj g|=  1.00892D-01

At iterate    5    f=  7.19523D+03    |proj g|=  9.14247D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   9.142D-03   7.195D+03
  F =   7195.2341662790777     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


INFO:root:Theta updated. New theta_hat: [0.532 0.21  0.451 0.229]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.206053
Iteration 20/100 | Max Change (Delta): 0.151762
Iteration 30/100 | Max Change (Delta): 0.121035
Iteration 40/100 | Max Change (Delta): 0.098330
Iteration 50/100 | Max Change (Delta): 0.080290
Iteration 60/100 | Max Change (Delta): 0.065642
Iteration 70/100 | Max Change (Delta): 0.053683
Iteration 80/100 | Max Change (Delta): 0.043906
Iteration 90/100 | Max Change (Delta): 0.035911


INFO:root:Policy updated.
 52%|█████▏    | 20893/40000 [25:29<23:25, 13.59it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029372

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  8.29832D+03    |proj g|=  1.00000D+00

At iterate    1    f=  7.95800D+03    |proj g|=  6.60963D-01


 52%|█████▏    | 20893/40000 [25:40<23:25, 13.59it/s]


At iterate    2    f=  7.95138D+03    |proj g|=  6.34247D-01

At iterate    3    f=  7.94377D+03    |proj g|=  5.31352D-01

At iterate    4    f=  7.94376D+03    |proj g|=  9.48748D-02

At iterate    5    f=  7.94376D+03    |proj g|=  1.83972D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   1.840D-02   7.944D+03
  F =   7943.7629819343902     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


INFO:root:Theta updated. New theta_hat: [0.529 0.214 0.434 0.232]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.207160
Iteration 20/100 | Max Change (Delta): 0.152514
Iteration 30/100 | Max Change (Delta): 0.121539
Iteration 40/100 | Max Change (Delta): 0.098716
Iteration 50/100 | Max Change (Delta): 0.080595
Iteration 60/100 | Max Change (Delta): 0.065887
Iteration 70/100 | Max Change (Delta): 0.053883
Iteration 80/100 | Max Change (Delta): 0.044070
Iteration 90/100 | Max Change (Delta): 0.036045


INFO:root:Policy updated.
 56%|█████▌    | 22275/40000 [27:09<21:39, 13.64it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029481

Value iteration finished (max iterations reached).


 56%|█████▌    | 22275/40000 [27:20<21:39, 13.64it/s]

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  9.08287D+03    |proj g|=  1.00000D+00

At iterate    1    f=  8.70514D+03    |proj g|=  6.57317D-01

At iterate    2    f=  8.69885D+03    |proj g|=  6.32780D-01

At iterate    3    f=  8.69153D+03    |proj g|=  5.22235D-01

At iterate    4    f=  8.69153D+03    |proj g|=  1.19985D-01

At iterate    5    f=  8.69153D+03    |proj g|=  4.16043D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   

INFO:root:Theta updated. New theta_hat: [0.52  0.209 0.429 0.259]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.206441
Iteration 20/100 | Max Change (Delta): 0.151888
Iteration 30/100 | Max Change (Delta): 0.121092
Iteration 40/100 | Max Change (Delta): 0.098353
Iteration 50/100 | Max Change (Delta): 0.080303
Iteration 60/100 | Max Change (Delta): 0.065650
Iteration 70/100 | Max Change (Delta): 0.053689
Iteration 80/100 | Max Change (Delta): 0.043911
Iteration 90/100 | Max Change (Delta): 0.035915


INFO:root:Policy updated.
 59%|█████▉    | 23762/40000 [29:33<21:49, 12.40it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029375

Value iteration finished (max iterations reached).


 59%|█████▉    | 23762/40000 [29:51<21:49, 12.40it/s]

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  9.87017D+03    |proj g|=  1.00000D+00

At iterate    1    f=  9.45409D+03    |proj g|=  6.54495D-01

At iterate    2    f=  9.44773D+03    |proj g|=  6.30410D-01

At iterate    3    f=  9.44049D+03    |proj g|=  5.19304D-01

At iterate    4    f=  9.44048D+03    |proj g|=  1.19204D-01

At iterate    5    f=  9.44048D+03    |proj g|=  4.52400D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   

INFO:root:Theta updated. New theta_hat: [0.517 0.226 0.427 0.258]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.204068
Iteration 20/100 | Max Change (Delta): 0.150289
Iteration 30/100 | Max Change (Delta): 0.119808
Iteration 40/100 | Max Change (Delta): 0.097330
Iteration 50/100 | Max Change (Delta): 0.079474
Iteration 60/100 | Max Change (Delta): 0.064974
Iteration 70/100 | Max Change (Delta): 0.053136
Iteration 80/100 | Max Change (Delta): 0.043459
Iteration 90/100 | Max Change (Delta): 0.035545


INFO:root:Policy updated.
 63%|██████▎   | 25109/40000 [32:40<24:07, 10.28it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029073

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.06686D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.02113D+04    |proj g|=  6.49063D-01

At iterate    2    f=  1.02035D+04    |proj g|=  6.17196D-01

At iterate    3    f=  1.01950D+04    |proj g|=  5.18037D-01

At iterate    4    f=  1.01950D+04    |proj g|=  1.47044D-01

At iterate    5    f=  1.01950D+04    |proj g|=  6.00806D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.516 0.232 0.457 0.246]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.201203
Iteration 20/100 | Max Change (Delta): 0.148177
Iteration 30/100 | Max Change (Delta): 0.118323
Iteration 40/100 | Max Change (Delta): 0.096169
Iteration 50/100 | Max Change (Delta): 0.078537
Iteration 60/100 | Max Change (Delta): 0.064211
Iteration 70/100 | Max Change (Delta): 0.052514
Iteration 80/100 | Max Change (Delta): 0.042950
Iteration 90/100 | Max Change (Delta): 0.035129


INFO:root:Policy updated.
 67%|██████▋   | 26683/40000 [36:21<24:43,  8.98it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028732

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.14733D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.10006D+04    |proj g|=  6.55181D-01

At iterate    2    f=  1.09918D+04    |proj g|=  6.29897D-01

At iterate    3    f=  1.09819D+04    |proj g|=  5.37977D-01

At iterate    4    f=  1.09819D+04    |proj g|=  1.82372D-01

At iterate    5    f=  1.09819D+04    |proj g|=  7.55922D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.535 0.226 0.43  0.237]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.204740
Iteration 20/100 | Max Change (Delta): 0.150771
Iteration 30/100 | Max Change (Delta): 0.120238
Iteration 40/100 | Max Change (Delta): 0.097679
Iteration 50/100 | Max Change (Delta): 0.079758
Iteration 60/100 | Max Change (Delta): 0.065207
Iteration 70/100 | Max Change (Delta): 0.053327
Iteration 80/100 | Max Change (Delta): 0.043615
Iteration 90/100 | Max Change (Delta): 0.035673


INFO:root:Policy updated.
 70%|███████   | 28002/40000 [40:56<27:42,  7.22it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029177

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.22705D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.17716D+04    |proj g|=  6.56036D-01

At iterate    2    f=  1.17628D+04    |proj g|=  6.32738D-01

At iterate    3    f=  1.17530D+04    |proj g|=  5.33875D-01

At iterate    4    f=  1.17530D+04    |proj g|=  1.72167D-01

At iterate    5    f=  1.17530D+04    |proj g|=  7.02955D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.531 0.228 0.423 0.243]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.206144
Iteration 20/100 | Max Change (Delta): 0.151705
Iteration 30/100 | Max Change (Delta): 0.121021
Iteration 40/100 | Max Change (Delta): 0.098311
Iteration 50/100 | Max Change (Delta): 0.080272
Iteration 60/100 | Max Change (Delta): 0.065626
Iteration 70/100 | Max Change (Delta): 0.053670
Iteration 80/100 | Max Change (Delta): 0.043896
Iteration 90/100 | Max Change (Delta): 0.035902


INFO:root:Policy updated.
 73%|███████▎  | 29281/40000 [46:51<31:41,  5.64it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029365

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.30923D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.25782D+04    |proj g|=  6.61811D-01

At iterate    2    f=  1.25670D+04    |proj g|=  6.37809D-01

At iterate    3    f=  1.25546D+04    |proj g|=  5.48655D-01

At iterate    4    f=  1.25546D+04    |proj g|=  1.97401D-01

At iterate    5    f=  1.25546D+04    |proj g|=  7.55783D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.546 0.205 0.418 0.239]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.207811
Iteration 20/100 | Max Change (Delta): 0.153028
Iteration 30/100 | Max Change (Delta): 0.121991
Iteration 40/100 | Max Change (Delta): 0.099088
Iteration 50/100 | Max Change (Delta): 0.080902
Iteration 60/100 | Max Change (Delta): 0.066140
Iteration 70/100 | Max Change (Delta): 0.054090
Iteration 80/100 | Max Change (Delta): 0.044239
Iteration 90/100 | Max Change (Delta): 0.036183


INFO:root:Policy updated.
 77%|███████▋  | 30675/40000 [52:07<29:51,  5.20it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029594

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.39167D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.33815D+04    |proj g|=  6.63488D-01

At iterate    2    f=  1.33707D+04    |proj g|=  6.40730D-01

At iterate    3    f=  1.33588D+04    |proj g|=  5.38943D-01

At iterate    4    f=  1.33587D+04    |proj g|=  2.01337D-01

At iterate    5    f=  1.33587D+04    |proj g|=  8.00674D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.536 0.203 0.411 0.247]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.208745
Iteration 20/100 | Max Change (Delta): 0.153651
Iteration 30/100 | Max Change (Delta): 0.122408
Iteration 40/100 | Max Change (Delta): 0.099406
Iteration 50/100 | Max Change (Delta): 0.081153
Iteration 60/100 | Max Change (Delta): 0.066343
Iteration 70/100 | Max Change (Delta): 0.054255
Iteration 80/100 | Max Change (Delta): 0.044374
Iteration 90/100 | Max Change (Delta): 0.036293


INFO:root:Policy updated.
 80%|███████▉  | 31944/40000 [56:56<27:08,  4.95it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029685

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.47330D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.41660D+04    |proj g|=  6.61523D-01

At iterate    2    f=  1.41552D+04    |proj g|=  6.35819D-01

At iterate    3    f=  1.41431D+04    |proj g|=  5.28106D-01

At iterate    4    f=  1.41431D+04    |proj g|=  2.25837D-01

At iterate    5    f=  1.41431D+04    |proj g|=  9.44321D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.526 0.199 0.422 0.255]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.208087
Iteration 20/100 | Max Change (Delta): 0.153139
Iteration 30/100 | Max Change (Delta): 0.122038
Iteration 40/100 | Max Change (Delta): 0.099114
Iteration 50/100 | Max Change (Delta): 0.080919
Iteration 60/100 | Max Change (Delta): 0.066153
Iteration 70/100 | Max Change (Delta): 0.054100
Iteration 80/100 | Max Change (Delta): 0.044247
Iteration 90/100 | Max Change (Delta): 0.036190


INFO:root:Policy updated.
 83%|████████▎ | 33106/40000 [5:32:43<7:38:41,  3.99s/it]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029600

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.55529D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.49534D+04    |proj g|=  6.60250D-01

At iterate    2    f=  1.49431D+04    |proj g|=  6.36568D-01

At iterate    3    f=  1.49318D+04    |proj g|=  5.20593D-01

At iterate    4    f=  1.49318D+04    |proj g|=  1.81161D-01

At iterate    5    f=  1.49318D+04    |proj g|=  6.82978D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.518 0.212 0.416 0.259]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.207906
Iteration 20/100 | Max Change (Delta): 0.153003
Iteration 30/100 | Max Change (Delta): 0.121918
Iteration 40/100 | Max Change (Delta): 0.099015
Iteration 50/100 | Max Change (Delta): 0.080837
Iteration 60/100 | Max Change (Delta): 0.066085
Iteration 70/100 | Max Change (Delta): 0.054045
Iteration 80/100 | Max Change (Delta): 0.044202
Iteration 90/100 | Max Change (Delta): 0.036153


INFO:root:Policy updated.
 86%|████████▌ | 34299/40000 [10:14:46<10:54:02,  6.88s/it]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029569

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.63775D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.57361D+04    |proj g|=  6.56290D-01

At iterate    2    f=  1.57256D+04    |proj g|=  6.32096D-01

At iterate    3    f=  1.57143D+04    |proj g|=  5.16416D-01

At iterate    4    f=  1.57143D+04    |proj g|=  1.72614D-01

At iterate    5    f=  1.57143D+04    |proj g|=  6.01048D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.514 0.22  0.423 0.26 ]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.206643
Iteration 20/100 | Max Change (Delta): 0.151968
Iteration 30/100 | Max Change (Delta): 0.121188
Iteration 40/100 | Max Change (Delta): 0.098421
Iteration 50/100 | Max Change (Delta): 0.080354
Iteration 60/100 | Max Change (Delta): 0.065691
Iteration 70/100 | Max Change (Delta): 0.053723
Iteration 80/100 | Max Change (Delta): 0.043939
Iteration 90/100 | Max Change (Delta): 0.035937


INFO:root:Policy updated.
 89%|████████▉ | 35617/40000 [11:43:42<7:18:47,  6.01s/it] INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029393

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.72120D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.65488D+04    |proj g|=  6.58172D-01

At iterate    2    f=  1.65383D+04    |proj g|=  6.32236D-01

At iterate    3    f=  1.65269D+04    |proj g|=  5.03299D-01

At iterate    4    f=  1.65269D+04    |proj g|=  1.80645D-01

At iterate    5    f=  1.65269D+04    |proj g|=  5.80194D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.502 0.21  0.427 0.269]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.208833
Iteration 20/100 | Max Change (Delta): 0.153599
Iteration 30/100 | Max Change (Delta): 0.122481
Iteration 40/100 | Max Change (Delta): 0.099461
Iteration 50/100 | Max Change (Delta): 0.081202
Iteration 60/100 | Max Change (Delta): 0.066383
Iteration 70/100 | Max Change (Delta): 0.054288
Iteration 80/100 | Max Change (Delta): 0.044401
Iteration 90/100 | Max Change (Delta): 0.036316


INFO:root:Policy updated.
 93%|█████████▎| 37032/40000 [11:52:08<3:27:14,  4.19s/it]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029703

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.80506D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.73693D+04    |proj g|=  6.60326D-01

At iterate    2    f=  1.73579D+04    |proj g|=  6.35827D-01

At iterate    3    f=  1.73455D+04    |proj g|=  5.08732D-01

At iterate    4    f=  1.73455D+04    |proj g|=  2.10046D-01

At iterate    5    f=  1.73455D+04    |proj g|=  7.49051D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.507 0.204 0.419 0.268]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.209538
Iteration 20/100 | Max Change (Delta): 0.154105
Iteration 30/100 | Max Change (Delta): 0.122792
Iteration 40/100 | Max Change (Delta): 0.099701
Iteration 50/100 | Max Change (Delta): 0.081393
Iteration 60/100 | Max Change (Delta): 0.066537
Iteration 70/100 | Max Change (Delta): 0.054414
Iteration 80/100 | Max Change (Delta): 0.044504
Iteration 90/100 | Max Change (Delta): 0.036400


INFO:root:Policy updated.
 96%|█████████▌| 38289/40000 [11:58:30<1:27:20,  3.06s/it]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029771

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.88865D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.81742D+04    |proj g|=  6.60570D-01

At iterate    2    f=  1.81611D+04    |proj g|=  6.35753D-01

At iterate    3    f=  1.81469D+04    |proj g|=  5.17870D-01

At iterate    4    f=  1.81469D+04    |proj g|=  2.40027D-01

At iterate    5    f=  1.81469D+04    |proj g|=  8.52514D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.516 0.2   0.421 0.262]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.208715
Iteration 20/100 | Max Change (Delta): 0.153586
Iteration 30/100 | Max Change (Delta): 0.122396
Iteration 40/100 | Max Change (Delta): 0.099400
Iteration 50/100 | Max Change (Delta): 0.081152
Iteration 60/100 | Max Change (Delta): 0.066342
Iteration 70/100 | Max Change (Delta): 0.054255
Iteration 80/100 | Max Change (Delta): 0.044374
Iteration 90/100 | Max Change (Delta): 0.036293


INFO:root:Policy updated.
100%|██████████| 40000/40000 [12:04:56<00:00,  1.09s/it]  
INFO:root:Simulation finished.


Iteration 100/100 | Max Change (Delta): 0.029685

Value iteration finished (max iterations reached).
Policy saved to models/simulator_20250915_214415.discrete_policy.pkl


INFO:root:Simulation state saved to models/simulator_20250915_214415.


In [None]:
degradation_df = pd.DataFrame(simulator.degradation_history)
simulation_df = pd.DataFrame(simulator.history)

degradation_df.to_csv(f'data/degradation_data_{current_time}.csv', index=False)
simulation_df.to_csv(f'data/simulation_data_{current_time}.csv', index=False)
simulator.save(f'models/simulator_{current_time}')

In [None]:
degradation_df = pd.DataFrame(simulator.degradation_history)
simulation_df = pd.DataFrame(simulator.history)

degradation_df.to_csv(f'data/degradation_data_{current_time}.csv', index=False)
simulation_df.to_csv(f'data/simulation_data_{current_time}.csv', index=False)
simulator.save(f'models/simulator_{current_time}')

### Convergence of $\hat\theta$

In [None]:
# simulator = Simulator.load('models/simulator_0914')

history = pd.DataFrame(simulator.history)
degradation_history = pd.DataFrame(simulator.degradation_history)

epsilons = [0.20 * (0.95 ** i) for i in range(len(simulator.theta_updates))]

times = []

for d in simulator.theta_updates:
    idx, theta_hat = d['customer_idx'], d['theta_hat']
    time = history[history.customer_id == idx]['calendar_time'].max()
    times.append(time)
    
# plot L2, and L-inf norms of utility updates
L2_errors = [np.linalg.norm(update['theta_hat'] - THETA_TRUE) for update in simulator.theta_updates]
Linf_errors = [np.linalg.norm(update['theta_hat'] - THETA_TRUE, ord=np.inf) for update in simulator.theta_updates]

plt.figure(figsize=(12, 6))
plt.plot(times, L2_errors, label='$L_2$ Norm Error', marker='o')
plt.plot(times, Linf_errors, label='$L_\infty$ Norm Error', marker='x')
plt.plot(times, epsilons, label='Exploration Rate (ε)', linestyle='--', color='gray')
# plt.yscale('log')
plt.xlabel('Number of Customers Processed', fontsize=14)
plt.ylabel('Error Norm', fontsize=14)

plt.title('Convergence of $\|\hat{\\theta} - \\theta\|$', fontsize=18)
plt.legend(fontsize=12)
plt.grid(True)
plt.savefig('figures/utility_convergence.pdf')
plt.show()

### Convergence of $\hat u$

In [None]:
simulator.utility_updates

# plot L2, and L-inf norms of utility updates
L2_errors = [np.linalg.norm(update['u_hat'] - UTILITY_TRUE) for update in simulator.utility_updates]
Linf_errors = [np.linalg.norm(update['u_hat'] - UTILITY_TRUE, ord=np.inf) for update in simulator.utility_updates]

plt.figure(figsize=(12, 6))
plt.plot(L2_errors, label='$L_2$ Norm Error', marker='o')
plt.plot(Linf_errors, label='$L_\infty$ Norm Error', marker='x')
# plt.yscale('log')
plt.xlabel('Number of Customers Processed', fontsize=14)
plt.ylabel('Error Norm', fontsize=14)

plt.title('Convergence of $\|\hat u - u\|$', fontsize=18)
plt.legend(fontsize=12)
plt.grid(True)
plt.savefig('figures/utility_convergence.pdf')
plt.show()

### Revenue of Online Learner

In [None]:
degradation_df = pd.DataFrame(simulator.degradation_history)
simulation_df = pd.DataFrame(simulator.history)

simulation_df['net_profit'] = simulation_df['profit'] + simulation_df['loss']
simulation_df['cumulative_net_profit'] = simulation_df['net_profit'].cumsum()

ax = plt.figure(figsize=(10,6))

# plot cumulative profit and loss over time
plt.plot(simulation_df['calendar_time'], simulation_df['cumulative_net_profit'], label='Cumulative Net Profit')
plt.xlabel('Calendar Time')
plt.ylabel('Cumulative Net Profit')
plt.title('Cumulative Net Profit Over Time')
plt.legend()
plt.grid()
plt.savefig('figures/cumulative_net_profit_online.pdf')
plt.show()

## Training policy under perfect information

### Revenue of Optimal Policy

In [None]:
class PerfectDegradationLearner:
    def __init__(self, d, theta_true, hazard_model):
        self.d = d
        self.theta_true = theta_true
        self.hazard_model = hazard_model  # Placeholder, not used
        
    def get_theta(self):
        return self.theta_true
    
    def cum_baseline(self, t):
        return self.hazard_model.Lambda_0(t)
    
    def inverse_cum_baseline(self, u):
        return self.hazard_model.Lambda_0_inverse(u)
    
perfect_degradation_learner = PerfectDegradationLearner(
    d=D, 
    theta_true=THETA_TRUE,
    hazard_model=usage_exp_hazard_model,
)

# N_perfect = [100, 40, 80, 150]

perfect_dpagent = DiscretizedDPAgent(
    N=simulator.training_hyperparams['N'], # grid sizes [cum_context, context, duration, active_time]
    max_cumulative_context=simulator.training_hyperparams['max_cumulative_context'],
    # max_active_time=simulator.training_hyperparams['max_active_time'],
    u_hat=UTILITY_TRUE,
    degradation_learner=perfect_degradation_learner,
    customer_generator=customer_gen,
    params=simulator.mdp_params,
)
# perfect_dpagent.run_value_iteration(simulator.training_hyperparams['num_value_iterations'])

# weight = torch.load('weights/perfect_dpagent_q_network.pth', map_location=torch.device('cuda'))
# perfect_dpagent.q_network.load_state_dict(weight)
# perfect_dpagent.q_network.to(perfect_dpagent.device)
# perfect_dpagent.q_network.eval()
perfect_dpagent._precompute_dynamics(100000)
perfect_dpagent.run_value_iteration(150)
perfect_policy = perfect_dpagent.get_policy('greedy')

In [None]:
simulation_df = pd.DataFrame(simulator.history)
# simulator.degradation_learner = perfect_degradation_learner
samples = simulator.run_full_exploit(100000, perfect_policy, {'tau': 0.01})
samples = pd.DataFrame(samples)

simulation_df['net_profit'] = simulation_df['profit'] + simulation_df['loss']
simulation_df['cumulative_net_profit'] = simulation_df['net_profit'].cumsum()
samples['net_profit'] = samples['profit'] + samples['loss']
samples['cumulative_net_profit'] = samples['net_profit'].cumsum()

samples['netprofit_per_time'] = samples['cumulative_net_profit'] / samples['calendar_time']
simulation_df['netprofit_per_time'] = simulation_df['cumulative_net_profit'] / simulation_df['calendar_time']

In [None]:
def calculate_rolling_rate(df, time_col, value_col, window_size):
    """
    Calculates the rate of a value over a rolling time window on irregular time series data.

    Args:
        df (pd.DataFrame): The input dataframe.
        time_col (str): The name of the column with time data.
        value_col (str): The name of the column with values to aggregate (e.g., 'net_profit').
        window_size (int): The duration of the rolling time window.

    Returns:
        pd.Series: A series containing the calculated rolling rate for each row.
    """
    # Ensure the dataframe is sorted by time, which is crucial.
    df = df.sort_values(time_col).reset_index(drop=True)
    
    times = df[time_col].values
    values = df[value_col].values
    
    # For each end time `t_i`, find the start time `t_i - window`.
    start_times = times - window_size
    
    # Use searchsorted to find the index where each start_time would be inserted.
    # This gives us the starting index of each time window efficiently.
    start_indices = np.searchsorted(times, start_times, side='left')
    
    # Use a cumulative sum to efficiently calculate the sum over any slice [j, i].
    value_cumsum = np.cumsum(values)
    
    # The sum for a window ending at `i` is cumsum[i] - cumsum[start_index - 1].
    # We create a shifted cumulative sum array to handle the `start_index - 1` lookup.
    shifted_cumsum = np.concatenate(([0], value_cumsum[:-1]))
    
    # Calculate the sum of values within each rolling window.
    window_sums = value_cumsum - shifted_cumsum[start_indices]
    
    # The rate is the sum of profit in the window divided by the window's duration.
    profit_rate = window_sums / window_size
    
    return pd.Series(profit_rate, index=df.index)


# --- 2. Calculate net profit and the rolling rate for each DataFrame ---

window_duration = 20000 # Define the time window for the rolling rate

for df in [simulation_df]:
# for df in [samples]:
    df['net_profit'] = df['profit'] + df['loss']
    # Add the new 'profit_rate' column using our helper function
    df['profit_rate'] = calculate_rolling_rate(df, 'calendar_time', 'net_profit', window_duration)

In [None]:
# --- 3. Plot the new rolling profit rate ---

# max_time = min(simulation_df['calendar_time'].max(), samples['calendar_time'].max())
max_time = simulation_df['calendar_time'].max()
# samples_plot = samples[(window_duration <= samples['calendar_time']) & (samples['calendar_time'] <= max_time)]
simulations_plot = simulation_df[
    (window_duration <= simulation_df['calendar_time']) &
    (simulation_df['calendar_time'] <= max_time)]


plt.figure(figsize=(10, 6))

# plt.plot(samples_plot['calendar_time'], samples_plot['profit_rate'], label=f'Optimal Policy (Rolling {window_duration} unit avg)')
plt.plot(simulations_plot['calendar_time'], simulations_plot['profit_rate'], label=f'Online Learning (Rolling {window_duration} unit avg)')

plt.xlabel('Calendar Time')
plt.ylabel('Profit Rate (Profit / Time Unit)')
plt.title(f'Rolling Profit Rate Over Time (Window = {window_duration} time units)')
plt.legend()
plt.grid(True)
plt.show()