In [1]:
import pickle
import numpy as np
import pandas as pd
from scipy.optimize import minimize
import torch

from policy import DPAgent
from new_new_policy import DiscretizedDPAgent
from simulation import Simulator, CustomerGenerator
from hazard_models import ExponentialHazard
from utility_learner import ProjectedVolumeLearner, diam
from degradation_learner import DegradationLearner
from datetime import datetime
from pytz import timezone

from utils import unit_ball_rejection_sample, correct_signs
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

import logging
logging.basicConfig(level=logging.INFO)

np.set_printoptions(suppress=True)

In [2]:
# --- 2. Define Sampling Functions ---
# def context_sampler() -> np.ndarray:
#     """Samples a customer's context vector from a uniform distribution."""
#     return np.random.uniform(low=0.0, high=1.0, size=D)

def context_sampler() -> np.ndarray:
    """Samples a customer's context vector uniformly from the unit ball."""
    return np.abs(unit_ball_rejection_sample(D))

def rental_sampler() -> float:
    """Samples a customer's desired rental duration from an exponential distribution."""
    return np.random.exponential(scale=10.0)

def interarrival_sampler() -> float:
    """Samples the time until the next customer arrives."""
    return np.random.exponential(scale=5.0)

In [3]:
# --- 1. Simulation Configuration ---
D = 4                                  # Dimension of context vectors
LAMBDA_VAL = 0.001                     # Baseline hazard constant
NUM_CUSTOMERS = 40000                   # Total number of customers to simulate, i.e. T

# Set a random seed for reproducibility
# np.random.seed(41)

# Ground truth vectors
THETA_TRUE = np.array([0.5, 0.2, 0.4, 0.3])#$, 0.4])    # For degradation
UTILITY_TRUE = np.array([0.372450167, 0.10850869, 0.33930126, 0.71356037])

# context_sampler()  # For customer's willingness to pay

# --- Machine's Pricing Vector 'r' ---
# This is a fallback pricing vector, when we don't feed u_hat to calculate_price
PRICING_R = np.zeros(D)

In [4]:
usage_exp_hazard_model = ExponentialHazard(lambda_val=LAMBDA_VAL)
# spontaneous_exp_hazard_model = None # ExponentialHazard(lambda_val=0.01)

customer_gen = CustomerGenerator(
    d=D,
    context_sampler=context_sampler,
    rental_sampler=rental_sampler,
    interarrival_sampler=interarrival_sampler
)

centroid_params = {
    # 'num_samples': 2000,
    # 'thin': None,
    # 'burn_in': 500 * D ** 2,
    # 'tol': 1e-4,
    # 'rho_target': 0.01
}

termination_rule = lambda diameter: diameter < 0.0005  # Example custom termination rule

projected_volume_learner = ProjectedVolumeLearner(
    T=NUM_CUSTOMERS, 
    d=D, 
    centroid_params=centroid_params,
    incentive_constant=1.1,
    termination_rule=termination_rule,
)

mdp_params = {
    'duration_lambda': 10.0,
    'interarrival_lambda': 5.0,
    'replacement_cost': 1.5,   # Cost to replace the machine
    'failure_cost': 0.75,      # Additional penalty for in-service failure
    'holding_cost_rate': 0.02,   # Cost per unit of idle time
    'gamma': 0.99,             # Discount factor
    'learning_rate': 1e-3,      # Learning rate for the Adam optimizer
    'target_update_freq': 10    # How often to update the target network (in iterations)
}

training_hyperparams = {
    # For FQI
    'num_iterations': 1, # Number of training iterations per policy update
    'dataset_size': 50000,      # Number of transitions to generate for the offline dataset
    'batch_size': 256,           # Batch size for training

    # For discrete DP
    # 'N': [80, 20, 60, 150], # grid sizes [cum_context, context, duration, active_time
    'N': [100, 50, 100, 100], # grid sizes [cum_context, context, revenue, duration]
    'max_cumulative_context': 8.0,
    # 'max_active_time': 150.0,
    'num_value_iterations': 100,
    
}

policy_type = 'decaying_epsilon_greedy'
policy_kwargs = {
    'current_epsilon': 0.10,
    'decay_rate': 0.95,
    'step': 0,
}

# Instantiate the Simulator with the new parameters
simulator = Simulator(
    d=D,
    T=NUM_CUSTOMERS,
    
    theta_true=THETA_TRUE,
    utility_true=UTILITY_TRUE,
    pricing_r=PRICING_R,
    
    usage_hazard_model=usage_exp_hazard_model,
    customer_generator=customer_gen,
    projected_volume_learner=projected_volume_learner,  # Use default ProjectedVolumeLearner
    
    mdp_params=mdp_params,
    discrete_dp=True,
    policy_type=policy_type,
    training_hyperparams=training_hyperparams,
    policy_kwargs=policy_kwargs,
    policy_update_threshold=100,
    time_normalize=True,
)

In [5]:
# # Lets you skip utility exploration with perfect u starting point
# simulator.projected_volume_learner.centroids.append(UTILITY_TRUE)
# simulator.projected_volume_learner.is_terminated = True
# simulator.seen_breakdowns = 2

# degradation_learner = DegradationLearner(d=simulator.d)
# degradation_learner.theta = np.ones(D) * 0.1
# degradation_learner.cum_baseline = lambda x: LAMBDA_VAL * x
# degradation_learner.inverse_cum_baseline = lambda y: y / LAMBDA_VAL
# simulator.degradation_learner = degradation_learner

# # dp_agent = DPAgent(
# #     d=simulator.d,
# #     u_hat=UTILITY_TRUE,
# #     time_normalize=simulator.time_normalize,
# #     degradation_learner=simulator.degradation_learner,
# #     customer_generator=simulator.customer_generator,
# #     params=simulator.mdp_params
# # )
# # dp_agent.train(**simulator.training_hyperparams)

# dp_agent = DiscretizedDPAgent(
#     N=training_hyperparams['N'], # grid sizes [cum_context, context, duration, active_time]
#     max_cumulative_context=training_hyperparams['max_cumulative_context'],
#     # max_active_time=training_hyperparams['max_active_time'],
#     u_hat=UTILITY_TRUE,
#     degradation_learner=degradation_learner,
#     customer_generator=customer_gen,
#     params=mdp_params,
# )
# # dp_agent._precompute_dynamics(num_samples=50000)
# dp_agent.run_value_iteration(100)

# simulator.dp_agent = dp_agent
# simulator.optimal_policy = dp_agent.get_policy(simulator.policy_type)
# simulator.breakdowns_since_last_update = 0 # Reset the counter


In [6]:
pacific_tz = timezone('America/Los_Angeles')
current_time = datetime.now(pacific_tz).strftime("%Y%m%d_%H%M%S")

# simulator.projected_volume_learner.is_terminated = True
simulation_data = simulator.run(num_customers=NUM_CUSTOMERS)
degradation_df = pd.DataFrame(simulator.degradation_history)
simulation_df = pd.DataFrame(simulator.history)

degradation_df.to_csv(f'data/degradation_data_{current_time}.csv', index=False)
simulation_df.to_csv(f'data/simulation_data_{current_time}.csv', index=False)
simulator.save(f'models/simulator_{current_time}')

INFO:root:Starting simulation for 40000 customers...
  0%|          | 0/40000 [00:00<?, ?it/s]

Set parameter Username


INFO:gurobipy:Set parameter Username


Set parameter LicenseID to value 2651514


INFO:gurobipy:Set parameter LicenseID to value 2651514


Academic license - for non-commercial use only - expires 2026-04-14


INFO:gurobipy:Academic license - for non-commercial use only - expires 2026-04-14
INFO:root:Customer 1: Diameter: 0.9998
  0%|          | 1/40000 [00:03<33:42:08,  3.03s/it]INFO:root:Customer 2: Diameter: 0.7090
  0%|          | 2/40000 [00:06<36:51:02,  3.32s/it]INFO:root:Customer 3: Diameter: 0.6879
  0%|          | 3/40000 [00:10<39:22:00,  3.54s/it]INFO:root:Customer 4: Diameter: 0.5048
  0%|          | 4/40000 [00:14<41:50:18,  3.77s/it]INFO:root:Customer 5: Diameter: 0.3429
  0%|          | 5/40000 [00:18<44:49:22,  4.03s/it]INFO:root:Customer 6: Diameter: 0.5139
  0%|          | 6/40000 [00:23<47:28:36,  4.27s/it]INFO:root:Customer 6: Diameter: 0.4965
  0%|          | 7/40000 [00:28<50:44:21,  4.57s/it]INFO:root:Customer 6: Diameter: 0.5335
  0%|          | 8/40000 [00:34<53:59:42,  4.86s/it]INFO:root:Customer 7: Diameter: 0.3964
  0%|          | 9/40000 [00:40<57:31:26,  5.18s/it]INFO:root:Customer 8: Diameter: 0.2517
  0%|          | 10/40000 [00:46<60:58:11,  5.49s/it]INFO:ro

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.79176D+00    |proj g|=  1.00000D+00

At iterate    1    f=  1.36639D-01    |proj g|=  0.00000D+00

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      1      2      4     0     4   0.000D+00   1.366D-01
  F =  0.13663854428759059     

CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - C

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.092451
Iteration 20/100 | Max Change (Delta): 0.063444
Iteration 30/100 | Max Change (Delta): 0.051349
Iteration 40/100 | Max Change (Delta): 0.041974
Iteration 50/100 | Max Change (Delta): 0.034329
Iteration 60/100 | Max Change (Delta): 0.028078
Iteration 70/100 | Max Change (Delta): 0.022965
Iteration 80/100 | Max Change (Delta): 0.018784
Iteration 90/100 | Max Change (Delta): 0.015363


INFO:root:Policy updated.
  8%|▊         | 3345/40000 [10:28<16:07, 37.90it/s]  

Iteration 100/100 | Max Change (Delta): 0.012566

Value iteration finished (max iterations reached).


 17%|█▋        | 6746/40000 [10:28<05:55, 93.60it/s]INFO:root:Updating optimal policy...


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  6.11127D+02    |proj g|=  1.00000D+00

At iterate    1    f=  5.93596D+02    |proj g|=  6.42302D-01

At iterate    2    f=  5.93420D+02    |proj g|=  6.21785D-01

At iterate    3    f=  5.93259D+02    |proj g|=  5.92914D-02


INFO:root:Theta updated. New theta_hat: [0.467 0.24  0.316 0.427]



At iterate    4    f=  5.93259D+02    |proj g|=  3.27114D-03

At iterate    5    f=  5.93259D+02    |proj g|=  1.45545D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   1.455D-03   5.933D+02
  F =   593.25918184620377     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectatio

 17%|█▋        | 6746/40000 [10:40<05:55, 93.60it/s]

Iteration 10/100 | Max Change (Delta): 0.201361
Iteration 20/100 | Max Change (Delta): 0.148161
Iteration 30/100 | Max Change (Delta): 0.118183
Iteration 40/100 | Max Change (Delta): 0.096060
Iteration 50/100 | Max Change (Delta): 0.078445
Iteration 60/100 | Max Change (Delta): 0.064134
Iteration 70/100 | Max Change (Delta): 0.052450
Iteration 80/100 | Max Change (Delta): 0.042898
Iteration 90/100 | Max Change (Delta): 0.035086


INFO:root:Policy updated.
 19%|█▊        | 7411/40000 [11:47<14:12, 38.24it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028697

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.15125D+03    |proj g|=  1.00000D+00

At iterate    1    f=  1.09960D+03    |proj g|=  6.47105D-01

At iterate    2    f=  1.09893D+03    |proj g|=  6.20503D-01

At iterate    3    f=  1.09816D+03    |proj g|=  3.12972D-01


INFO:root:Theta updated. New theta_hat: [0.374 0.176 0.445 0.461]



At iterate    4    f=  1.09816D+03    |proj g|=  1.13041D-02

At iterate    5    f=  1.09816D+03    |proj g|=  3.49919D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   3.499D-03   1.098D+03
  F =   1098.1589465468173     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectatio

INFO:root:Policy updated.
 22%|██▏       | 8792/40000 [13:06<18:24, 28.25it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028616

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.76539D+03    |proj g|=  1.00000D+00

At iterate    1    f=  1.68201D+03    |proj g|=  6.51175D-01

At iterate    2    f=  1.67955D+03    |proj g|=  6.31164D-01

At iterate    3    f=  1.67662D+03    |proj g|=  4.87289D-01

At iterate    4    f=  1.67662D+03    |proj g|=  8.41722D-02

At iterate    5    f=  1.67662D+03    |proj g|=  4.14975D-02

At iterate    6    f=  1.67662D+03    |proj g|=  1.04309D-02


INFO:root:Theta updated. New theta_hat: [0.484 0.064 0.43  0.475]



At iterate    7    f=  1.67662D+03    |proj g|=  2.89253D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      7      9     10     0     0   2.893D-03   1.677D+03
  F =   1676.6168378640043     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated 

 22%|██▏       | 8792/40000 [13:20<18:24, 28.25it/s]

Iteration 20/100 | Max Change (Delta): 0.147200
Iteration 30/100 | Max Change (Delta): 0.117564
Iteration 40/100 | Max Change (Delta): 0.095618
Iteration 50/100 | Max Change (Delta): 0.078102
Iteration 60/100 | Max Change (Delta): 0.063859
Iteration 70/100 | Max Change (Delta): 0.052226
Iteration 80/100 | Max Change (Delta): 0.042715
Iteration 90/100 | Max Change (Delta): 0.034936


INFO:root:Policy updated.
 26%|██▌       | 10292/40000 [14:26<20:17, 24.40it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028575

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  2.41746D+03    |proj g|=  1.00000D+00

At iterate    1    f=  2.31374D+03    |proj g|=  6.65370D-01

At iterate    2    f=  2.31059D+03    |proj g|=  6.49553D-01

At iterate    3    f=  2.30695D+03    |proj g|=  5.39246D-01

At iterate    4    f=  2.30694D+03    |proj g|=  8.23225D-02

At iterate    5    f=  2.30694D+03    |proj g|=  3.09407D-02

At iterate    6    f=  2.30694D+03    |proj g|=  1.29250D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.538 0.082 0.381 0.397]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.207881


 26%|██▌       | 10292/40000 [14:40<20:17, 24.40it/s]

Iteration 20/100 | Max Change (Delta): 0.152811
Iteration 30/100 | Max Change (Delta): 0.121898
Iteration 40/100 | Max Change (Delta): 0.099029
Iteration 50/100 | Max Change (Delta): 0.080859
Iteration 60/100 | Max Change (Delta): 0.066107
Iteration 70/100 | Max Change (Delta): 0.054063
Iteration 80/100 | Max Change (Delta): 0.044217
Iteration 90/100 | Max Change (Delta): 0.036165


INFO:root:Policy updated.
 29%|██▉       | 11750/40000 [15:46<21:18, 22.10it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029580

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  3.08723D+03    |proj g|=  1.00000D+00

At iterate    1    f=  2.95874D+03    |proj g|=  6.72801D-01

At iterate    2    f=  2.95629D+03    |proj g|=  6.42200D-01

At iterate    3    f=  2.95344D+03    |proj g|=  4.28396D-01

At iterate    4    f=  2.95343D+03    |proj g|=  6.08122D-02

At iterate    5    f=  2.95343D+03    |proj g|=  9.05055D-03


INFO:root:Theta updated. New theta_hat: [0.461 0.128 0.427 0.339]



At iterate    6    f=  2.95343D+03    |proj g|=  5.07267D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      6      8      9     0     0   5.073D-03   2.953D+03
  F =   2953.4339763402259     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated 

 29%|██▉       | 11750/40000 [16:00<21:18, 22.10it/s]

Iteration 20/100 | Max Change (Delta): 0.158270
Iteration 30/100 | Max Change (Delta): 0.125899
Iteration 40/100 | Max Change (Delta): 0.102161
Iteration 50/100 | Max Change (Delta): 0.083375
Iteration 60/100 | Max Change (Delta): 0.068152
Iteration 70/100 | Max Change (Delta): 0.055732
Iteration 80/100 | Max Change (Delta): 0.045582
Iteration 90/100 | Max Change (Delta): 0.037281


INFO:root:Policy updated.
 33%|███▎      | 13256/40000 [17:07<21:17, 20.93it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.030492

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  3.76515D+03    |proj g|=  1.00000D+00

At iterate    1    f=  3.61195D+03    |proj g|=  6.73871D-01

At iterate    2    f=  3.60734D+03    |proj g|=  6.47987D-01

At iterate    3    f=  3.60223D+03    |proj g|=  5.48437D-01

At iterate    4    f=  3.60222D+03    |proj g|=  1.02333D-01

At iterate    5    f=  3.60222D+03    |proj g|=  2.84976D-02


INFO:root:Theta updated. New theta_hat: [0.548 0.113 0.395 0.302]



At iterate    6    f=  3.60222D+03    |proj g|=  1.19458D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      6      8      9     0     0   1.195D-02   3.602D+03
  F =   3602.2205503000878     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated 

 33%|███▎      | 13256/40000 [17:20<21:17, 20.93it/s]

Iteration 10/100 | Max Change (Delta): 0.213771
Iteration 20/100 | Max Change (Delta): 0.157206
Iteration 30/100 | Max Change (Delta): 0.125090
Iteration 40/100 | Max Change (Delta): 0.101554
Iteration 50/100 | Max Change (Delta): 0.082893
Iteration 60/100 | Max Change (Delta): 0.067762
Iteration 70/100 | Max Change (Delta): 0.055415
Iteration 80/100 | Max Change (Delta): 0.045322
Iteration 90/100 | Max Change (Delta): 0.037069


INFO:root:Policy updated.
 36%|███▌      | 14370/40000 [18:29<23:11, 18.41it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.030319

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  4.45891D+03    |proj g|=  1.00000D+00

At iterate    1    f=  4.27551D+03    |proj g|=  6.75154D-01

At iterate    2    f=  4.27072D+03    |proj g|=  6.51881D-01

At iterate    3    f=  4.26520D+03    |proj g|=  5.31506D-01

At iterate    4    f=  4.26519D+03    |proj g|=  1.19851D-01

At iterate    5    f=  4.26519D+03    |proj g|=  2.98160D-02


INFO:root:Theta updated. New theta_hat: [0.53  0.12  0.396 0.309]



At iterate    6    f=  4.26519D+03    |proj g|=  1.38893D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      6      8      9     0     0   1.389D-02   4.265D+03
  F =   4265.1945805700079     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated 

 36%|███▌      | 14370/40000 [18:40<23:11, 18.41it/s]

Iteration 10/100 | Max Change (Delta): 0.215069
Iteration 20/100 | Max Change (Delta): 0.158063
Iteration 30/100 | Max Change (Delta): 0.125748
Iteration 40/100 | Max Change (Delta): 0.102070
Iteration 50/100 | Max Change (Delta): 0.083309
Iteration 60/100 | Max Change (Delta): 0.068100
Iteration 70/100 | Max Change (Delta): 0.055691
Iteration 80/100 | Max Change (Delta): 0.045548
Iteration 90/100 | Max Change (Delta): 0.037253


INFO:root:Policy updated.
 39%|███▉      | 15544/40000 [19:53<24:02, 16.95it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.030470

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  5.17740D+03    |proj g|=  1.00000D+00

At iterate    1    f=  4.97363D+03    |proj g|=  6.80313D-01

At iterate    2    f=  4.96788D+03    |proj g|=  6.53680D-01

At iterate    3    f=  4.96088D+03    |proj g|=  5.21377D-01

At iterate    4    f=  4.96088D+03    |proj g|=  1.59637D-01

At iterate    5    f=  4.96088D+03    |proj g|=  6.33375D-02

At iterate    6    f=  4.96088D+03    |proj g|=  1.72175D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.52  0.09  0.404 0.323]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.217210


 39%|███▉      | 15544/40000 [20:10<24:02, 16.95it/s]

Iteration 20/100 | Max Change (Delta): 0.159577
Iteration 30/100 | Max Change (Delta): 0.126898
Iteration 40/100 | Max Change (Delta): 0.102978
Iteration 50/100 | Max Change (Delta): 0.084041
Iteration 60/100 | Max Change (Delta): 0.068695
Iteration 70/100 | Max Change (Delta): 0.056177
Iteration 80/100 | Max Change (Delta): 0.045945
Iteration 90/100 | Max Change (Delta): 0.037578


INFO:root:Policy updated.
 42%|████▏     | 16827/40000 [21:14<23:15, 16.60it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.030735

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  5.90905D+03    |proj g|=  1.00000D+00

At iterate    1    f=  5.66890D+03    |proj g|=  6.72228D-01

At iterate    2    f=  5.66323D+03    |proj g|=  6.62461D-01

At iterate    3    f=  5.65659D+03    |proj g|=  5.22768D-01

At iterate    4    f=  5.65659D+03    |proj g|=  1.20515D-01

At iterate    5    f=  5.65659D+03    |proj g|=  4.05486D-02

At iterate    6    f=  5.65659D+03    |proj g|=  1.84628D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.522 0.117 0.365 0.36 ]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...


 42%|████▏     | 16827/40000 [21:30<23:15, 16.60it/s]

Iteration 10/100 | Max Change (Delta): 0.213527
Iteration 20/100 | Max Change (Delta): 0.156847
Iteration 30/100 | Max Change (Delta): 0.124901
Iteration 40/100 | Max Change (Delta): 0.101387
Iteration 50/100 | Max Change (Delta): 0.082761
Iteration 60/100 | Max Change (Delta): 0.067654
Iteration 70/100 | Max Change (Delta): 0.055327
Iteration 80/100 | Max Change (Delta): 0.045250
Iteration 90/100 | Max Change (Delta): 0.037010


INFO:root:Policy updated.
 45%|████▌     | 18153/40000 [22:42<22:34, 16.13it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.030271

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  6.63948D+03    |proj g|=  1.00000D+00

At iterate    1    f=  6.35601D+03    |proj g|=  6.62759D-01

At iterate    2    f=  6.34997D+03    |proj g|=  6.36147D-01

At iterate    3    f=  6.34297D+03    |proj g|=  5.05468D-01

At iterate    4    f=  6.34297D+03    |proj g|=  1.53195D-01

At iterate    5    f=  6.34297D+03    |proj g|=  5.30017D-02

At iterate    6    f=  6.34297D+03    |proj g|=  1.61868D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.505 0.127 0.421 0.347]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...


 45%|████▌     | 18153/40000 [23:00<22:34, 16.13it/s]

Iteration 10/100 | Max Change (Delta): 0.208671
Iteration 20/100 | Max Change (Delta): 0.153355
Iteration 30/100 | Max Change (Delta): 0.122301
Iteration 40/100 | Max Change (Delta): 0.099328
Iteration 50/100 | Max Change (Delta): 0.081095
Iteration 60/100 | Max Change (Delta): 0.066297
Iteration 70/100 | Max Change (Delta): 0.054218
Iteration 80/100 | Max Change (Delta): 0.044344
Iteration 90/100 | Max Change (Delta): 0.036268


INFO:root:Policy updated.
 49%|████▊     | 19406/40000 [24:13<22:19, 15.38it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029664

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  7.39325D+03    |proj g|=  1.00000D+00


 49%|████▊     | 19406/40000 [24:30<22:19, 15.38it/s]


At iterate    1    f=  7.06736D+03    |proj g|=  6.55482D-01

At iterate    2    f=  7.06091D+03    |proj g|=  6.32621D-01

At iterate    3    f=  7.05366D+03    |proj g|=  5.23118D-01

At iterate    4    f=  7.05366D+03    |proj g|=  1.37018D-01

At iterate    5    f=  7.05366D+03    |proj g|=  5.19120D-02

At iterate    6    f=  7.05366D+03    |proj g|=  9.10268D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      6      8      9     0     0   9.103D-03   7.054D+03
  F =   7053.6569552668252     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


INFO:root:Theta updated. New theta_hat: [0.522 0.148 0.411 0.349]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.203830
Iteration 20/100 | Max Change (Delta): 0.150054
Iteration 30/100 | Max Change (Delta): 0.119753
Iteration 40/100 | Max Change (Delta): 0.097315
Iteration 50/100 | Max Change (Delta): 0.079467
Iteration 60/100 | Max Change (Delta): 0.064970
Iteration 70/100 | Max Change (Delta): 0.053134
Iteration 80/100 | Max Change (Delta): 0.043457
Iteration 90/100 | Max Change (Delta): 0.035544


INFO:root:Policy updated.
 52%|█████▏    | 20694/40000 [26:29<24:53, 12.92it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029071

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  8.16518D+03    |proj g|=  1.00000D+00

At iterate    1    f=  7.81721D+03    |proj g|=  6.57945D-01

At iterate    2    f=  7.80887D+03    |proj g|=  6.36787D-01

At iterate    3    f=  7.79946D+03    |proj g|=  5.41853D-01

At iterate    4    f=  7.79945D+03    |proj g|=  2.04654D-01

At iterate    5    f=  7.79945D+03    |proj g|=  7.22415D-02

At iterate    6    f=  7.79945D+03    |proj g|=  3.05400D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.54  0.128 0.404 0.346]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.206435
Iteration 20/100 | Max Change (Delta): 0.151903
Iteration 30/100 | Max Change (Delta): 0.121186
Iteration 40/100 | Max Change (Delta): 0.098457
Iteration 50/100 | Max Change (Delta): 0.080394
Iteration 60/100 | Max Change (Delta): 0.065727
Iteration 70/100 | Max Change (Delta): 0.053752
Iteration 80/100 | Max Change (Delta): 0.043963
Iteration 90/100 | Max Change (Delta): 0.035957


INFO:root:Policy updated.
 55%|█████▌    | 22046/40000 [29:25<28:02, 10.67it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029409

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  8.93680D+03    |proj g|=  1.00000D+00

At iterate    1    f=  8.55738D+03    |proj g|=  6.56533D-01

At iterate    2    f=  8.54990D+03    |proj g|=  6.39792D-01

At iterate    3    f=  8.54149D+03    |proj g|=  5.32309D-01

At iterate    4    f=  8.54148D+03    |proj g|=  1.85073D-01

At iterate    5    f=  8.54148D+03    |proj g|=  6.76324D-02

At iterate    6    f=  8.54148D+03    |proj g|=  3.00477D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.53  0.154 0.39  0.342]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.206319
Iteration 20/100 | Max Change (Delta): 0.151816
Iteration 30/100 | Max Change (Delta): 0.121122
Iteration 40/100 | Max Change (Delta): 0.098401
Iteration 50/100 | Max Change (Delta): 0.080346
Iteration 60/100 | Max Change (Delta): 0.065687
Iteration 70/100 | Max Change (Delta): 0.053720
Iteration 80/100 | Max Change (Delta): 0.043936
Iteration 90/100 | Max Change (Delta): 0.035935


INFO:root:Policy updated.
 58%|█████▊    | 23265/40000 [32:38<31:17,  8.91it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029392

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  9.72969D+03    |proj g|=  1.00000D+00

At iterate    1    f=  9.31691D+03    |proj g|=  6.55916D-01

At iterate    2    f=  9.30855D+03    |proj g|=  6.41145D-01

At iterate    3    f=  9.29895D+03    |proj g|=  5.47149D-01

At iterate    4    f=  9.29895D+03    |proj g|=  2.05954D-01

At iterate    5    f=  9.29895D+03    |proj g|=  7.18507D-02

At iterate    6    f=  9.29895D+03    |proj g|=  2.58666D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.544 0.158 0.387 0.334]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.204681
Iteration 20/100 | Max Change (Delta): 0.150651
Iteration 30/100 | Max Change (Delta): 0.120243
Iteration 40/100 | Max Change (Delta): 0.097702
Iteration 50/100 | Max Change (Delta): 0.079781
Iteration 60/100 | Max Change (Delta): 0.065226
Iteration 70/100 | Max Change (Delta): 0.053343
Iteration 80/100 | Max Change (Delta): 0.043628
Iteration 90/100 | Max Change (Delta): 0.035684


INFO:root:Policy updated.
 62%|██████▏   | 24656/40000 [36:14<32:11,  7.94it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029186

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.05198D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.00764D+04    |proj g|=  6.56984D-01

At iterate    2    f=  1.00672D+04    |proj g|=  6.42594D-01

At iterate    3    f=  1.00566D+04    |proj g|=  5.44851D-01

At iterate    4    f=  1.00566D+04    |proj g|=  2.14848D-01

At iterate    5    f=  1.00566D+04    |proj g|=  7.24045D-02

At iterate    6    f=  1.00566D+04    |proj g|=  5.82004D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.542 0.15  0.385 0.34 ]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.206228
Iteration 20/100 | Max Change (Delta): 0.151764
Iteration 30/100 | Max Change (Delta): 0.121099
Iteration 40/100 | Max Change (Delta): 0.098387
Iteration 50/100 | Max Change (Delta): 0.080337
Iteration 60/100 | Max Change (Delta): 0.065680
Iteration 70/100 | Max Change (Delta): 0.053714
Iteration 80/100 | Max Change (Delta): 0.043931
Iteration 90/100 | Max Change (Delta): 0.035931


INFO:root:Policy updated.
 65%|██████▍   | 25834/40000 [40:08<34:26,  6.86it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029388

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.13265D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.08593D+04    |proj g|=  6.59071D-01

At iterate    2    f=  1.08503D+04    |proj g|=  6.44410D-01

At iterate    3    f=  1.08402D+04    |proj g|=  5.30219D-01

At iterate    4    f=  1.08402D+04    |proj g|=  1.69911D-01

At iterate    5    f=  1.08402D+04    |proj g|=  5.37000D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.528 0.159 0.383 0.336]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.207129
Iteration 20/100 | Max Change (Delta): 0.152329
Iteration 30/100 | Max Change (Delta): 0.121530
Iteration 40/100 | Max Change (Delta): 0.098715
Iteration 50/100 | Max Change (Delta): 0.080599
Iteration 60/100 | Max Change (Delta): 0.065893
Iteration 70/100 | Max Change (Delta): 0.053888
Iteration 80/100 | Max Change (Delta): 0.044074
Iteration 90/100 | Max Change (Delta): 0.036048


INFO:root:Policy updated.
 68%|██████▊   | 27033/40000 [42:01<28:21,  7.62it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029484

Value iteration finished (max iterations reached).


 68%|██████▊   | 27033/40000 [42:21<28:21,  7.62it/s]

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.21396D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.16498D+04    |proj g|=  6.61386D-01

At iterate    2    f=  1.16398D+04    |proj g|=  6.48307D-01

At iterate    3    f=  1.16284D+04    |proj g|=  5.31280D-01

At iterate    4    f=  1.16284D+04    |proj g|=  2.03529D-01

At iterate    5    f=  1.16284D+04    |proj g|=  6.06168D-02

At iterate    6    f=  1.16284D+04    |proj g|=  6.56928D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nac

INFO:root:Theta updated. New theta_hat: [0.529 0.149 0.379 0.338]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.208828
Iteration 20/100 | Max Change (Delta): 0.153480
Iteration 30/100 | Max Change (Delta): 0.122364
Iteration 40/100 | Max Change (Delta): 0.099370
Iteration 50/100 | Max Change (Delta): 0.081128
Iteration 60/100 | Max Change (Delta): 0.066323
Iteration 70/100 | Max Change (Delta): 0.054239
Iteration 80/100 | Max Change (Delta): 0.044361
Iteration 90/100 | Max Change (Delta): 0.036283


INFO:root:Policy updated.
 71%|███████▏  | 28506/40000 [47:32<31:08,  6.15it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029676

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.29513D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.24215D+04    |proj g|=  6.59177D-01

At iterate    2    f=  1.24111D+04    |proj g|=  6.45806D-01

At iterate    3    f=  1.23990D+04    |proj g|=  5.34709D-01

At iterate    4    f=  1.23990D+04    |proj g|=  2.40465D-01

At iterate    5    f=  1.23990D+04    |proj g|=  7.47484D-02

At iterate    6    f=  1.23990D+04    |proj g|=  6.19268D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.532 0.153 0.381 0.338]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.207918
Iteration 20/100 | Max Change (Delta): 0.152818
Iteration 30/100 | Max Change (Delta): 0.121883
Iteration 40/100 | Max Change (Delta): 0.098995
Iteration 50/100 | Max Change (Delta): 0.080824
Iteration 60/100 | Max Change (Delta): 0.066076
Iteration 70/100 | Max Change (Delta): 0.054037
Iteration 80/100 | Max Change (Delta): 0.044196
Iteration 90/100 | Max Change (Delta): 0.036148


INFO:root:Policy updated.
 75%|███████▍  | 29874/40000 [54:24<34:36,  4.88it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029565

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.37812D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.32062D+04    |proj g|=  6.54183D-01

At iterate    2    f=  1.31954D+04    |proj g|=  6.39695D-01

At iterate    3    f=  1.31833D+04    |proj g|=  5.31616D-01

At iterate    4    f=  1.31833D+04    |proj g|=  2.18531D-01

At iterate    5    f=  1.31833D+04    |proj g|=  6.31995D-02

At iterate    6    f=  1.31833D+04    |proj g|=  6.58594D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.529 0.16  0.387 0.347]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.206026
Iteration 20/100 | Max Change (Delta): 0.151642
Iteration 30/100 | Max Change (Delta): 0.120942
Iteration 40/100 | Max Change (Delta): 0.098264
Iteration 50/100 | Max Change (Delta): 0.080235
Iteration 60/100 | Max Change (Delta): 0.065596
Iteration 70/100 | Max Change (Delta): 0.053645
Iteration 80/100 | Max Change (Delta): 0.043875
Iteration 90/100 | Max Change (Delta): 0.035885


INFO:root:Policy updated.
 79%|███████▊  | 31432/40000 [4:40:10<7:13:56,  3.04s/it]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029351

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.46030D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.40045D+04    |proj g|=  6.56265D-01

At iterate    2    f=  1.39923D+04    |proj g|=  6.51637D-01

At iterate    3    f=  1.39786D+04    |proj g|=  5.29809D-01

At iterate    4    f=  1.39786D+04    |proj g|=  2.40429D-01

At iterate    5    f=  1.39786D+04    |proj g|=  7.51575D-02

At iterate    6    f=  1.39786D+04    |proj g|=  6.00094D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.527 0.146 0.385 0.358]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.206016
Iteration 20/100 | Max Change (Delta): 0.151548
Iteration 30/100 | Max Change (Delta): 0.120897
Iteration 40/100 | Max Change (Delta): 0.098213
Iteration 50/100 | Max Change (Delta): 0.080191
Iteration 60/100 | Max Change (Delta): 0.065560
Iteration 70/100 | Max Change (Delta): 0.053616
Iteration 80/100 | Max Change (Delta): 0.043851
Iteration 90/100 | Max Change (Delta): 0.035866


INFO:root:Policy updated.
 82%|████████▏ | 32787/40000 [8:39:32<10:31:45,  5.26s/it]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029335

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.54288D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.47971D+04    |proj g|=  6.56021D-01

At iterate    2    f=  1.47843D+04    |proj g|=  6.38074D-01

At iterate    3    f=  1.47702D+04    |proj g|=  5.25371D-01

At iterate    4    f=  1.47701D+04    |proj g|=  2.79649D-01

At iterate    5    f=  1.47701D+04    |proj g|=  8.91302D-02

At iterate    6    f=  1.47701D+04    |proj g|=  5.74086D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.523 0.147 0.396 0.351]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.205758
Iteration 20/100 | Max Change (Delta): 0.151427
Iteration 30/100 | Max Change (Delta): 0.120826
Iteration 40/100 | Max Change (Delta): 0.098170
Iteration 50/100 | Max Change (Delta): 0.080160
Iteration 60/100 | Max Change (Delta): 0.065535
Iteration 70/100 | Max Change (Delta): 0.053596
Iteration 80/100 | Max Change (Delta): 0.043835
Iteration 90/100 | Max Change (Delta): 0.035852


INFO:root:Policy updated.
 88%|████████▊ | 35301/40000 [11:08:07<5:25:51,  4.16s/it]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029324

Value iteration finished (max iterations reached).


 88%|████████▊ | 35301/40000 [11:08:25<5:25:51,  4.16s/it]

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.62510D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.55929D+04    |proj g|=  6.57885D-01

At iterate    2    f=  1.55804D+04    |proj g|=  6.40938D-01

At iterate    3    f=  1.55662D+04    |proj g|=  5.21860D-01

At iterate    4    f=  1.55662D+04    |proj g|=  2.78874D-01

At iterate    5    f=  1.55662D+04    |proj g|=  9.51832D-02

At iterate    6    f=  1.55662D+04    |proj g|=  6.01130D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nac

INFO:root:Theta updated. New theta_hat: [0.519 0.151 0.391 0.349]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.206248
Iteration 20/100 | Max Change (Delta): 0.151680
Iteration 30/100 | Max Change (Delta): 0.121027
Iteration 40/100 | Max Change (Delta): 0.098315
Iteration 50/100 | Max Change (Delta): 0.080273
Iteration 60/100 | Max Change (Delta): 0.065627
Iteration 70/100 | Max Change (Delta): 0.053670
Iteration 80/100 | Max Change (Delta): 0.043896
Iteration 90/100 | Max Change (Delta): 0.035902


INFO:root:Policy updated.
 88%|████████▊ | 35321/40000 [11:46:26<6:20:42,  4.88s/it]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029365

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.70684D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.63911D+04    |proj g|=  6.60332D-01

At iterate    2    f=  1.63778D+04    |proj g|=  6.42027D-01

At iterate    3    f=  1.63631D+04    |proj g|=  5.17119D-01

At iterate    4    f=  1.63631D+04    |proj g|=  2.78475D-01

At iterate    5    f=  1.63631D+04    |proj g|=  9.43090D-02

At iterate    6    f=  1.63631D+04    |proj g|=  8.32719D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.515 0.149 0.393 0.345]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.207998
Iteration 20/100 | Max Change (Delta): 0.152944
Iteration 30/100 | Max Change (Delta): 0.121987
Iteration 40/100 | Max Change (Delta): 0.099093
Iteration 50/100 | Max Change (Delta): 0.080906
Iteration 60/100 | Max Change (Delta): 0.066144
Iteration 70/100 | Max Change (Delta): 0.054093
Iteration 80/100 | Max Change (Delta): 0.044242
Iteration 90/100 | Max Change (Delta): 0.036185


INFO:root:Policy updated.
 91%|█████████ | 36441/40000 [11:52:46<3:17:18,  3.33s/it]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029596

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.78882D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.71745D+04    |proj g|=  6.58666D-01

At iterate    2    f=  1.71611D+04    |proj g|=  6.42582D-01

At iterate    3    f=  1.71466D+04    |proj g|=  5.16311D-01

At iterate    4    f=  1.71466D+04    |proj g|=  2.66022D-01

At iterate    5    f=  1.71466D+04    |proj g|=  8.18624D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.514 0.157 0.388 0.349]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.206940
Iteration 20/100 | Max Change (Delta): 0.152118
Iteration 30/100 | Max Change (Delta): 0.121357
Iteration 40/100 | Max Change (Delta): 0.098569
Iteration 50/100 | Max Change (Delta): 0.080478
Iteration 60/100 | Max Change (Delta): 0.065794
Iteration 70/100 | Max Change (Delta): 0.053807
Iteration 80/100 | Max Change (Delta): 0.044007
Iteration 90/100 | Max Change (Delta): 0.035993


INFO:root:Policy updated.
 94%|█████████▍| 37589/40000 [11:58:59<1:33:23,  2.32s/it]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029439

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.87212D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.79682D+04    |proj g|=  6.55931D-01

At iterate    2    f=  1.79539D+04    |proj g|=  6.40373D-01

At iterate    3    f=  1.79384D+04    |proj g|=  5.23029D-01

At iterate    4    f=  1.79384D+04    |proj g|=  2.89521D-01

At iterate    5    f=  1.79384D+04    |proj g|=  8.84559D-02

At iterate    6    f=  1.79384D+04    |proj g|=  5.59787D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.52  0.158 0.388 0.353]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.204966
Iteration 20/100 | Max Change (Delta): 0.150819
Iteration 30/100 | Max Change (Delta): 0.120365
Iteration 40/100 | Max Change (Delta): 0.097797
Iteration 50/100 | Max Change (Delta): 0.079856
Iteration 60/100 | Max Change (Delta): 0.065287
Iteration 70/100 | Max Change (Delta): 0.053393
Iteration 80/100 | Max Change (Delta): 0.043669
Iteration 90/100 | Max Change (Delta): 0.035717


INFO:root:Policy updated.
 97%|█████████▋| 38756/40000 [12:05:26<34:41,  1.67s/it]  INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029213

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.95525D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.87618D+04    |proj g|=  6.54588D-01

At iterate    2    f=  1.87470D+04    |proj g|=  6.50130D-01

At iterate    3    f=  1.87309D+04    |proj g|=  5.20574D-01

At iterate    4    f=  1.87309D+04    |proj g|=  3.06262D-01

At iterate    5    f=  1.87309D+04    |proj g|=  9.24603D-02

At iterate    6    f=  1.87309D+04    |proj g|=  7.04357D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.518 0.157 0.389 0.358]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.205200
Iteration 20/100 | Max Change (Delta): 0.151027
Iteration 30/100 | Max Change (Delta): 0.120486
Iteration 40/100 | Max Change (Delta): 0.097900
Iteration 50/100 | Max Change (Delta): 0.079940
Iteration 60/100 | Max Change (Delta): 0.065356
Iteration 70/100 | Max Change (Delta): 0.053449
Iteration 80/100 | Max Change (Delta): 0.043715
Iteration 90/100 | Max Change (Delta): 0.035754


INFO:root:Policy updated.
100%|██████████| 40000/40000 [12:06:48<00:00,  1.09s/it]
INFO:root:Simulation finished.


Iteration 100/100 | Max Change (Delta): 0.029243

Value iteration finished (max iterations reached).


INFO:root:Simulation state saved to models/simulator_20250915_214357.


Policy saved to models/simulator_20250915_214357.discrete_policy.pkl


In [None]:
degradation_df = pd.DataFrame(simulator.degradation_history)
simulation_df = pd.DataFrame(simulator.history)

degradation_df.to_csv(f'data/degradation_data_{current_time}.csv', index=False)
simulation_df.to_csv(f'data/simulation_data_{current_time}.csv', index=False)
simulator.save(f'models/simulator_{current_time}')

### Convergence of $\hat\theta$

In [None]:
# simulator = Simulator.load('models/simulator_0914')

history = pd.DataFrame(simulator.history)
degradation_history = pd.DataFrame(simulator.degradation_history)

epsilons = [0.20 * (0.95 ** i) for i in range(len(simulator.theta_updates))]

times = []

for d in simulator.theta_updates:
    idx, theta_hat = d['customer_idx'], d['theta_hat']
    time = history[history.customer_id == idx]['calendar_time'].max()
    times.append(time)
    
# plot L2, and L-inf norms of utility updates
L2_errors = [np.linalg.norm(update['theta_hat'] - THETA_TRUE) for update in simulator.theta_updates]
Linf_errors = [np.linalg.norm(update['theta_hat'] - THETA_TRUE, ord=np.inf) for update in simulator.theta_updates]

plt.figure(figsize=(12, 6))
plt.plot(times, L2_errors, label='$L_2$ Norm Error', marker='o')
plt.plot(times, Linf_errors, label='$L_\infty$ Norm Error', marker='x')
plt.plot(times, epsilons, label='Exploration Rate (ε)', linestyle='--', color='gray')
# plt.yscale('log')
plt.xlabel('Number of Customers Processed', fontsize=14)
plt.ylabel('Error Norm', fontsize=14)

plt.title('Convergence of $\|\hat{\\theta} - \\theta\|$', fontsize=18)
plt.legend(fontsize=12)
plt.grid(True)
plt.savefig('figures/utility_convergence.pdf')
plt.show()

### Convergence of $\hat u$

In [None]:
simulator.utility_updates

# plot L2, and L-inf norms of utility updates
L2_errors = [np.linalg.norm(update['u_hat'] - UTILITY_TRUE) for update in simulator.utility_updates]
Linf_errors = [np.linalg.norm(update['u_hat'] - UTILITY_TRUE, ord=np.inf) for update in simulator.utility_updates]

plt.figure(figsize=(12, 6))
plt.plot(L2_errors, label='$L_2$ Norm Error', marker='o')
plt.plot(Linf_errors, label='$L_\infty$ Norm Error', marker='x')
# plt.yscale('log')
plt.xlabel('Number of Customers Processed', fontsize=14)
plt.ylabel('Error Norm', fontsize=14)

plt.title('Convergence of $\|\hat u - u\|$', fontsize=18)
plt.legend(fontsize=12)
plt.grid(True)
plt.savefig('figures/utility_convergence.pdf')
plt.show()

### Revenue of Online Learner

In [None]:
degradation_df = pd.DataFrame(simulator.degradation_history)
simulation_df = pd.DataFrame(simulator.history)

simulation_df['net_profit'] = simulation_df['profit'] + simulation_df['loss']
simulation_df['cumulative_net_profit'] = simulation_df['net_profit'].cumsum()

ax = plt.figure(figsize=(10,6))

# plot cumulative profit and loss over time
plt.plot(simulation_df['calendar_time'], simulation_df['cumulative_net_profit'], label='Cumulative Net Profit')
plt.xlabel('Calendar Time')
plt.ylabel('Cumulative Net Profit')
plt.title('Cumulative Net Profit Over Time')
plt.legend()
plt.grid()
plt.savefig('figures/cumulative_net_profit_online.pdf')
plt.show()

## Training policy under perfect information

### Revenue of Optimal Policy

In [None]:
class PerfectDegradationLearner:
    def __init__(self, d, theta_true, hazard_model):
        self.d = d
        self.theta_true = theta_true
        self.hazard_model = hazard_model  # Placeholder, not used
        
    def get_theta(self):
        return self.theta_true
    
    def cum_baseline(self, t):
        return self.hazard_model.Lambda_0(t)
    
    def inverse_cum_baseline(self, u):
        return self.hazard_model.Lambda_0_inverse(u)
    
perfect_degradation_learner = PerfectDegradationLearner(
    d=D, 
    theta_true=THETA_TRUE,
    hazard_model=usage_exp_hazard_model,
)

# N_perfect = [100, 40, 80, 150]

perfect_dpagent = DiscretizedDPAgent(
    N=simulator.training_hyperparams['N'], # grid sizes [cum_context, context, duration, active_time]
    max_cumulative_context=simulator.training_hyperparams['max_cumulative_context'],
    # max_active_time=simulator.training_hyperparams['max_active_time'],
    u_hat=UTILITY_TRUE,
    degradation_learner=perfect_degradation_learner,
    customer_generator=customer_gen,
    params=simulator.mdp_params,
)
# perfect_dpagent.run_value_iteration(simulator.training_hyperparams['num_value_iterations'])

# weight = torch.load('weights/perfect_dpagent_q_network.pth', map_location=torch.device('cuda'))
# perfect_dpagent.q_network.load_state_dict(weight)
# perfect_dpagent.q_network.to(perfect_dpagent.device)
# perfect_dpagent.q_network.eval()
perfect_dpagent._precompute_dynamics(100000)
perfect_dpagent.run_value_iteration(150)
perfect_policy = perfect_dpagent.get_policy('greedy')

In [None]:
simulation_df = pd.DataFrame(simulator.history)
# simulator.degradation_learner = perfect_degradation_learner
samples = simulator.run_full_exploit(100000, perfect_policy, {'tau': 0.01})
samples = pd.DataFrame(samples)

simulation_df['net_profit'] = simulation_df['profit'] + simulation_df['loss']
simulation_df['cumulative_net_profit'] = simulation_df['net_profit'].cumsum()
samples['net_profit'] = samples['profit'] + samples['loss']
samples['cumulative_net_profit'] = samples['net_profit'].cumsum()

samples['netprofit_per_time'] = samples['cumulative_net_profit'] / samples['calendar_time']
simulation_df['netprofit_per_time'] = simulation_df['cumulative_net_profit'] / simulation_df['calendar_time']

In [None]:
def calculate_rolling_rate(df, time_col, value_col, window_size):
    """
    Calculates the rate of a value over a rolling time window on irregular time series data.

    Args:
        df (pd.DataFrame): The input dataframe.
        time_col (str): The name of the column with time data.
        value_col (str): The name of the column with values to aggregate (e.g., 'net_profit').
        window_size (int): The duration of the rolling time window.

    Returns:
        pd.Series: A series containing the calculated rolling rate for each row.
    """
    # Ensure the dataframe is sorted by time, which is crucial.
    df = df.sort_values(time_col).reset_index(drop=True)
    
    times = df[time_col].values
    values = df[value_col].values
    
    # For each end time `t_i`, find the start time `t_i - window`.
    start_times = times - window_size
    
    # Use searchsorted to find the index where each start_time would be inserted.
    # This gives us the starting index of each time window efficiently.
    start_indices = np.searchsorted(times, start_times, side='left')
    
    # Use a cumulative sum to efficiently calculate the sum over any slice [j, i].
    value_cumsum = np.cumsum(values)
    
    # The sum for a window ending at `i` is cumsum[i] - cumsum[start_index - 1].
    # We create a shifted cumulative sum array to handle the `start_index - 1` lookup.
    shifted_cumsum = np.concatenate(([0], value_cumsum[:-1]))
    
    # Calculate the sum of values within each rolling window.
    window_sums = value_cumsum - shifted_cumsum[start_indices]
    
    # The rate is the sum of profit in the window divided by the window's duration.
    profit_rate = window_sums / window_size
    
    return pd.Series(profit_rate, index=df.index)


# --- 2. Calculate net profit and the rolling rate for each DataFrame ---

window_duration = 20000 # Define the time window for the rolling rate

for df in [simulation_df]:
# for df in [samples]:
    df['net_profit'] = df['profit'] + df['loss']
    # Add the new 'profit_rate' column using our helper function
    df['profit_rate'] = calculate_rolling_rate(df, 'calendar_time', 'net_profit', window_duration)

In [None]:
# --- 3. Plot the new rolling profit rate ---

# max_time = min(simulation_df['calendar_time'].max(), samples['calendar_time'].max())
max_time = simulation_df['calendar_time'].max()
# samples_plot = samples[(window_duration <= samples['calendar_time']) & (samples['calendar_time'] <= max_time)]
simulations_plot = simulation_df[
    (window_duration <= simulation_df['calendar_time']) &
    (simulation_df['calendar_time'] <= max_time)]


plt.figure(figsize=(10, 6))

# plt.plot(samples_plot['calendar_time'], samples_plot['profit_rate'], label=f'Optimal Policy (Rolling {window_duration} unit avg)')
plt.plot(simulations_plot['calendar_time'], simulations_plot['profit_rate'], label=f'Online Learning (Rolling {window_duration} unit avg)')

plt.xlabel('Calendar Time')
plt.ylabel('Profit Rate (Profit / Time Unit)')
plt.title(f'Rolling Profit Rate Over Time (Window = {window_duration} time units)')
plt.legend()
plt.grid(True)
plt.show()