In [1]:
import pickle
import numpy as np
import pandas as pd
from scipy.optimize import minimize
import torch

from policy import DPAgent
from new_new_policy import DiscretizedDPAgent
from simulation import Simulator, CustomerGenerator
from hazard_models import ExponentialHazard
from utility_learner import ProjectedVolumeLearner, diam
from degradation_learner import DegradationLearner
from datetime import datetime
from pytz import timezone

from utils import unit_ball_rejection_sample, correct_signs
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

import logging
logging.basicConfig(level=logging.INFO)

np.set_printoptions(suppress=True)

In [2]:
# --- 2. Define Sampling Functions ---
# def context_sampler() -> np.ndarray:
#     """Samples a customer's context vector from a uniform distribution."""
#     return np.random.uniform(low=0.0, high=1.0, size=D)

def context_sampler() -> np.ndarray:
    """Samples a customer's context vector uniformly from the unit ball."""
    return np.abs(unit_ball_rejection_sample(D))

def rental_sampler() -> float:
    """Samples a customer's desired rental duration from an exponential distribution."""
    return np.random.exponential(scale=10.0)

def interarrival_sampler() -> float:
    """Samples the time until the next customer arrives."""
    return np.random.exponential(scale=5.0)

In [3]:
# --- 1. Simulation Configuration ---
D = 4                                  # Dimension of context vectors
LAMBDA_VAL = 0.001                     # Baseline hazard constant
NUM_CUSTOMERS = 40000                   # Total number of customers to simulate, i.e. T

# Set a random seed for reproducibility
# np.random.seed(41)

# Ground truth vectors
THETA_TRUE = np.array([0.5, 0.2, 0.4, 0.3])#$, 0.4])    # For degradation
UTILITY_TRUE = np.array([0.372450167, 0.10850869, 0.33930126, 0.71356037])

# context_sampler()  # For customer's willingness to pay

# --- Machine's Pricing Vector 'r' ---
# This is a fallback pricing vector, when we don't feed u_hat to calculate_price
PRICING_R = np.zeros(D)

In [4]:
usage_exp_hazard_model = ExponentialHazard(lambda_val=LAMBDA_VAL)
# spontaneous_exp_hazard_model = None # ExponentialHazard(lambda_val=0.01)

customer_gen = CustomerGenerator(
    d=D,
    context_sampler=context_sampler,
    rental_sampler=rental_sampler,
    interarrival_sampler=interarrival_sampler
)

centroid_params = {
    # 'num_samples': 2000,
    # 'thin': None,
    # 'burn_in': 500 * D ** 2,
    # 'tol': 1e-4,
    # 'rho_target': 0.01
}

termination_rule = lambda diameter: diameter < 0.0005  # Example custom termination rule

projected_volume_learner = ProjectedVolumeLearner(
    T=NUM_CUSTOMERS, 
    d=D, 
    centroid_params=centroid_params,
    incentive_constant=1.1,
    termination_rule=termination_rule,
)

mdp_params = {
    'duration_lambda': 10.0,
    'interarrival_lambda': 5.0,
    'replacement_cost': 1.5,   # Cost to replace the machine
    'failure_cost': 0.75,      # Additional penalty for in-service failure
    'holding_cost_rate': 0.02,   # Cost per unit of idle time
    'gamma': 0.99,             # Discount factor
    'learning_rate': 1e-3,      # Learning rate for the Adam optimizer
    'target_update_freq': 10    # How often to update the target network (in iterations)
}

training_hyperparams = {
    # For FQI
    'num_iterations': 1, # Number of training iterations per policy update
    'dataset_size': 50000,      # Number of transitions to generate for the offline dataset
    'batch_size': 256,           # Batch size for training

    # For discrete DP
    # 'N': [80, 20, 60, 150], # grid sizes [cum_context, context, duration, active_time
    'N': [100, 50, 100, 100], # grid sizes [cum_context, context, revenue, duration]
    'max_cumulative_context': 8.0,
    # 'max_active_time': 150.0,
    'num_value_iterations': 100,
    
}

policy_type = 'decaying_epsilon_greedy'
policy_kwargs = {
    'current_epsilon': 0.10,
    'decay_rate': 0.95,
    'step': 0,
}

# Instantiate the Simulator with the new parameters
simulator = Simulator(
    d=D,
    T=NUM_CUSTOMERS,
    
    theta_true=THETA_TRUE,
    utility_true=UTILITY_TRUE,
    pricing_r=PRICING_R,
    
    usage_hazard_model=usage_exp_hazard_model,
    customer_generator=customer_gen,
    projected_volume_learner=projected_volume_learner,  # Use default ProjectedVolumeLearner
    
    mdp_params=mdp_params,
    discrete_dp=True,
    policy_type=policy_type,
    training_hyperparams=training_hyperparams,
    policy_kwargs=policy_kwargs,
    policy_update_threshold=100,
    time_normalize=True,
)

In [5]:
# # Lets you skip utility exploration with perfect u starting point
# simulator.projected_volume_learner.centroids.append(UTILITY_TRUE)
# simulator.projected_volume_learner.is_terminated = True
# simulator.seen_breakdowns = 2

# degradation_learner = DegradationLearner(d=simulator.d)
# degradation_learner.theta = np.ones(D) * 0.1
# degradation_learner.cum_baseline = lambda x: LAMBDA_VAL * x
# degradation_learner.inverse_cum_baseline = lambda y: y / LAMBDA_VAL
# simulator.degradation_learner = degradation_learner

# # dp_agent = DPAgent(
# #     d=simulator.d,
# #     u_hat=UTILITY_TRUE,
# #     time_normalize=simulator.time_normalize,
# #     degradation_learner=simulator.degradation_learner,
# #     customer_generator=simulator.customer_generator,
# #     params=simulator.mdp_params
# # )
# # dp_agent.train(**simulator.training_hyperparams)

# dp_agent = DiscretizedDPAgent(
#     N=training_hyperparams['N'], # grid sizes [cum_context, context, duration, active_time]
#     max_cumulative_context=training_hyperparams['max_cumulative_context'],
#     # max_active_time=training_hyperparams['max_active_time'],
#     u_hat=UTILITY_TRUE,
#     degradation_learner=degradation_learner,
#     customer_generator=customer_gen,
#     params=mdp_params,
# )
# # dp_agent._precompute_dynamics(num_samples=50000)
# dp_agent.run_value_iteration(100)

# simulator.dp_agent = dp_agent
# simulator.optimal_policy = dp_agent.get_policy(simulator.policy_type)
# simulator.breakdowns_since_last_update = 0 # Reset the counter


In [6]:
pacific_tz = timezone('America/Los_Angeles')
current_time = datetime.now(pacific_tz).strftime("%Y%m%d_%H%M%S")

# simulator.projected_volume_learner.is_terminated = True
simulation_data = simulator.run(num_customers=NUM_CUSTOMERS)
degradation_df = pd.DataFrame(simulator.degradation_history)
simulation_df = pd.DataFrame(simulator.history)

degradation_df.to_csv(f'data/degradation_data_{current_time}.csv', index=False)
simulation_df.to_csv(f'data/simulation_data_{current_time}.csv', index=False)
simulator.save(f'models/simulator_{current_time}')

INFO:root:Starting simulation for 40000 customers...
  0%|          | 0/40000 [00:00<?, ?it/s]

Set parameter Username


INFO:gurobipy:Set parameter Username


Set parameter LicenseID to value 2651514


INFO:gurobipy:Set parameter LicenseID to value 2651514


Academic license - for non-commercial use only - expires 2026-04-14


INFO:gurobipy:Academic license - for non-commercial use only - expires 2026-04-14
INFO:root:Customer 1: Diameter: 0.9884
  0%|          | 1/40000 [00:03<39:58:35,  3.60s/it]INFO:root:Customer 2: Diameter: 0.7264
  0%|          | 2/40000 [00:07<42:39:17,  3.84s/it]INFO:root:Customer 3: Diameter: 0.5623
  0%|          | 3/40000 [00:11<44:58:54,  4.05s/it]INFO:root:Customer 4: Diameter: 0.4410
  0%|          | 4/40000 [00:15<44:59:58,  4.05s/it]INFO:root:Customer 5: Diameter: 0.2949
  0%|          | 5/40000 [00:20<45:37:11,  4.11s/it]INFO:root:Customer 6: Diameter: 0.5421
  0%|          | 6/40000 [00:24<47:20:08,  4.26s/it]INFO:root:Customer 7: Diameter: 0.5359
  0%|          | 7/40000 [00:29<49:41:11,  4.47s/it]INFO:root:Customer 7: Diameter: 0.1833
  0%|          | 8/40000 [00:34<52:19:53,  4.71s/it]INFO:root:Customer 8: Diameter: 0.3026
  0%|          | 9/40000 [00:40<55:06:05,  4.96s/it]INFO:root:Customer 9: Diameter: 0.1540
  0%|          | 10/40000 [00:46<58:14:48,  5.24s/it]INFO:ro

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.79176D+00    |proj g|=  1.00000D+00

At iterate    1    f=  4.87954D-01    |proj g|=  2.24639D-01

At iterate    2    f=  4.28651D-01    |proj g|=  7.23188D-02

At iterate    3    f=  4.20600D-01    |proj g|=  4.55658D-02

At iterate    4    f=  4.17852D-01    |proj g|=  1.87538D-03

At iterate    5    f=  4.17847D-01    |proj g|=  5.89068D-05

At iterate    6    f=  4.17847D-01    |proj g|=  7.53869D-08

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nac

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.124811
Iteration 20/100 | Max Change (Delta): 0.091322
Iteration 30/100 | Max Change (Delta): 0.074003
Iteration 40/100 | Max Change (Delta): 0.060483
Iteration 50/100 | Max Change (Delta): 0.049466
Iteration 60/100 | Max Change (Delta): 0.040458
Iteration 70/100 | Max Change (Delta): 0.033091
Iteration 80/100 | Max Change (Delta): 0.027065
Iteration 90/100 | Max Change (Delta): 0.022137


INFO:root:Policy updated.
  8%|▊         | 3059/40000 [08:37<14:49, 41.55it/s]  INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.018106

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  5.25403D+02    |proj g|=  1.00000D+00

At iterate    1    f=  4.99364D+02    |proj g|=  6.51226D-01

At iterate    2    f=  4.97066D+02    |proj g|=  5.94982D-01

At iterate    3    f=  4.94664D+02    |proj g|=  6.91517D-01

At iterate    4    f=  4.94656D+02    |proj g|=  2.10880D-02

At iterate    5    f=  4.94656D+02    |proj g|=  1.34970D-02


INFO:root:Theta updated. New theta_hat: [0.594 0.003 0.679 0.186]



At iterate    6    f=  4.94656D+02    |proj g|=  2.51589D-03

At iterate    7    f=  4.94656D+02    |proj g|=  1.17061D-04

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      7      9     10     0     0   1.171D-04   4.947D+02
  F =   494.65604724625774     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectatio

  8%|▊         | 3059/40000 [08:50<14:49, 41.55it/s]

Iteration 20/100 | Max Change (Delta): 0.147295
Iteration 30/100 | Max Change (Delta): 0.117762
Iteration 40/100 | Max Change (Delta): 0.095843
Iteration 50/100 | Max Change (Delta): 0.078303
Iteration 60/100 | Max Change (Delta): 0.064029
Iteration 70/100 | Max Change (Delta): 0.052367
Iteration 80/100 | Max Change (Delta): 0.042831
Iteration 90/100 | Max Change (Delta): 0.035032


INFO:root:Policy updated.
  9%|▊         | 3470/40000 [09:47<29:12, 20.85it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028653

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.08147D+03    |proj g|=  1.00000D+00

At iterate    1    f=  1.01976D+03    |proj g|=  6.68852D-01

At iterate    2    f=  1.01714D+03    |proj g|=  6.21329D-01

At iterate    3    f=  1.01475D+03    |proj g|=  3.42478D-01

At iterate    4    f=  1.01475D+03    |proj g|=  4.43742D-02

At iterate    5    f=  1.01475D+03    |proj g|=  2.26721D-02

At iterate    6    f=  1.01475D+03    |proj g|=  9.59665D-03


INFO:root:Theta updated. New theta_hat: [0.477 0.087 0.545 0.339]



At iterate    7    f=  1.01475D+03    |proj g|=  1.11681D-04

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      7      9     10     0     0   1.117D-04   1.015D+03
  F =   1014.7490165259341     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated 

INFO:root:Policy updated.
 12%|█▏        | 4934/40000 [11:02<28:55, 20.20it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028787

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.71059D+03    |proj g|=  1.00000D+00

At iterate    1    f=  1.61842D+03    |proj g|=  6.61870D-01

At iterate    2    f=  1.61472D+03    |proj g|=  6.16032D-01

At iterate    3    f=  1.61096D+03    |proj g|=  4.87128D-01

At iterate    4    f=  1.61095D+03    |proj g|=  1.00587D-01


INFO:root:Theta updated. New theta_hat: [0.488 0.113 0.583 0.275]



At iterate    5    f=  1.61095D+03    |proj g|=  4.81623D-03

At iterate    6    f=  1.61095D+03    |proj g|=  3.21619D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      6      8      9     0     0   3.216D-03   1.611D+03
  F =   1610.9513729314449     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectatio

 12%|█▏        | 4934/40000 [11:20<28:55, 20.20it/s]

Iteration 20/100 | Max Change (Delta): 0.148009
Iteration 30/100 | Max Change (Delta): 0.118274
Iteration 40/100 | Max Change (Delta): 0.096180
Iteration 50/100 | Max Change (Delta): 0.078560
Iteration 60/100 | Max Change (Delta): 0.064233
Iteration 70/100 | Max Change (Delta): 0.052533
Iteration 80/100 | Max Change (Delta): 0.042966
Iteration 90/100 | Max Change (Delta): 0.035142


INFO:root:Policy updated.
 16%|█▌        | 6300/40000 [12:16<28:46, 19.52it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028743

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  2.37945D+03    |proj g|=  1.00000D+00

At iterate    1    f=  2.26073D+03    |proj g|=  6.58640D-01

At iterate    2    f=  2.25829D+03    |proj g|=  6.18172D-01

At iterate    3    f=  2.25590D+03    |proj g|=  4.66831D-01

At iterate    4    f=  2.25589D+03    |proj g|=  7.00145D-02

At iterate    5    f=  2.25589D+03    |proj g|=  8.35303D-03

At iterate    6    f=  2.25589D+03    |proj g|=  6.03989D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.487 0.177 0.469 0.293]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.204396


 16%|█▌        | 6300/40000 [12:30<28:46, 19.52it/s]

Iteration 20/100 | Max Change (Delta): 0.150351
Iteration 30/100 | Max Change (Delta): 0.119978
Iteration 40/100 | Max Change (Delta): 0.097476
Iteration 50/100 | Max Change (Delta): 0.079593
Iteration 60/100 | Max Change (Delta): 0.065072
Iteration 70/100 | Max Change (Delta): 0.053216
Iteration 80/100 | Max Change (Delta): 0.043525
Iteration 90/100 | Max Change (Delta): 0.035599


INFO:root:Policy updated.
 19%|█▉        | 7694/40000 [13:27<27:35, 19.52it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029116

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  3.06729D+03    |proj g|=  1.00000D+00

At iterate    1    f=  2.91017D+03    |proj g|=  6.50908D-01

At iterate    2    f=  2.90793D+03    |proj g|=  6.18497D-01

At iterate    3    f=  2.90558D+03    |proj g|=  4.27837D-01

At iterate    4    f=  2.90557D+03    |proj g|=  6.69728D-02

At iterate    5    f=  2.90557D+03    |proj g|=  9.34105D-03


INFO:root:Theta updated. New theta_hat: [0.466 0.221 0.478 0.28 ]



At iterate    6    f=  2.90557D+03    |proj g|=  6.94324D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      6      8      9     0     0   6.943D-03   2.906D+03
  F =   2905.5749385698755     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated 

 19%|█▉        | 7694/40000 [13:40<27:35, 19.52it/s]

Iteration 10/100 | Max Change (Delta): 0.202313
Iteration 20/100 | Max Change (Delta): 0.148859
Iteration 30/100 | Max Change (Delta): 0.118836
Iteration 40/100 | Max Change (Delta): 0.096569
Iteration 50/100 | Max Change (Delta): 0.078859
Iteration 60/100 | Max Change (Delta): 0.064474
Iteration 70/100 | Max Change (Delta): 0.052728
Iteration 80/100 | Max Change (Delta): 0.043125
Iteration 90/100 | Max Change (Delta): 0.035272


INFO:root:Policy updated.
 23%|██▎       | 9298/40000 [14:41<25:10, 20.33it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028849

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  3.77268D+03    |proj g|=  1.00000D+00

At iterate    1    f=  3.57956D+03    |proj g|=  6.48360D-01

At iterate    2    f=  3.57760D+03    |proj g|=  6.18926D-01

At iterate    3    f=  3.57545D+03    |proj g|=  4.57224D-01

At iterate    4    f=  3.57545D+03    |proj g|=  6.25708D-02


INFO:root:Theta updated. New theta_hat: [0.457 0.239 0.46  0.291]



At iterate    5    f=  3.57545D+03    |proj g|=  7.67532D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   7.675D-03   3.575D+03
  F =   3575.4465352579673     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated 

 23%|██▎       | 9298/40000 [15:00<25:10, 20.33it/s]

Iteration 30/100 | Max Change (Delta): 0.118666
Iteration 40/100 | Max Change (Delta): 0.096431
Iteration 50/100 | Max Change (Delta): 0.078746
Iteration 60/100 | Max Change (Delta): 0.064381
Iteration 70/100 | Max Change (Delta): 0.052652
Iteration 80/100 | Max Change (Delta): 0.043063
Iteration 90/100 | Max Change (Delta): 0.035221


INFO:root:Policy updated.
 26%|██▋       | 10600/40000 [15:50<24:42, 19.84it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028808

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  4.49199D+03    |proj g|=  1.00000D+00

At iterate    1    f=  4.27648D+03    |proj g|=  6.55631D-01

At iterate    2    f=  4.27396D+03    |proj g|=  6.22757D-01

At iterate    3    f=  4.27122D+03    |proj g|=  4.61622D-01

At iterate    4    f=  4.27122D+03    |proj g|=  9.46453D-02

At iterate    5    f=  4.27122D+03    |proj g|=  1.60444D-02


INFO:root:Theta updated. New theta_hat: [0.461 0.246 0.451 0.257]



At iterate    6    f=  4.27122D+03    |proj g|=  1.18641D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      6      8      9     0     0   1.186D-02   4.271D+03
  F =   4271.2221603780918     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated 

 26%|██▋       | 10600/40000 [16:10<24:42, 19.84it/s]

Iteration 30/100 | Max Change (Delta): 0.121390
Iteration 40/100 | Max Change (Delta): 0.098581
Iteration 50/100 | Max Change (Delta): 0.080486
Iteration 60/100 | Max Change (Delta): 0.065798
Iteration 70/100 | Max Change (Delta): 0.053810
Iteration 80/100 | Max Change (Delta): 0.044010
Iteration 90/100 | Max Change (Delta): 0.035996


INFO:root:Policy updated.
 30%|██▉       | 11905/40000 [17:00<24:01, 19.49it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029441

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  5.21342D+03    |proj g|=  1.00000D+00

At iterate    1    f=  4.96253D+03    |proj g|=  6.52070D-01

At iterate    2    f=  4.95862D+03    |proj g|=  6.18127D-01

At iterate    3    f=  4.95453D+03    |proj g|=  4.83571D-01

At iterate    4    f=  4.95453D+03    |proj g|=  1.11629D-01

At iterate    5    f=  4.95453D+03    |proj g|=  1.04194D-02


INFO:root:Theta updated. New theta_hat: [0.482 0.218 0.461 0.266]



At iterate    6    f=  4.95453D+03    |proj g|=  8.21802D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      6      8      9     0     0   8.218D-03   4.955D+03
  F =   4954.5263789552337     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated 

 30%|██▉       | 11905/40000 [17:10<24:01, 19.49it/s]

Iteration 10/100 | Max Change (Delta): 0.204540
Iteration 20/100 | Max Change (Delta): 0.150501
Iteration 30/100 | Max Change (Delta): 0.120050
Iteration 40/100 | Max Change (Delta): 0.097525
Iteration 50/100 | Max Change (Delta): 0.079633
Iteration 60/100 | Max Change (Delta): 0.065104
Iteration 70/100 | Max Change (Delta): 0.053243
Iteration 80/100 | Max Change (Delta): 0.043546
Iteration 90/100 | Max Change (Delta): 0.035616


INFO:root:Policy updated.
 33%|███▎      | 13211/40000 [18:09<23:11, 19.25it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029131

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  5.95782D+03    |proj g|=  1.00000D+00

At iterate    1    f=  5.67294D+03    |proj g|=  6.51664D-01

At iterate    2    f=  5.66893D+03    |proj g|=  6.22780D-01

At iterate    3    f=  5.66475D+03    |proj g|=  4.85943D-01

At iterate    4    f=  5.66475D+03    |proj g|=  9.94106D-02

At iterate    5    f=  5.66475D+03    |proj g|=  1.17609D-02

At iterate    6    f=  5.66475D+03    |proj g|=  7.17510D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.485 0.209 0.441 0.297]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...


 33%|███▎      | 13211/40000 [18:20<23:11, 19.25it/s]

Iteration 10/100 | Max Change (Delta): 0.204757
Iteration 20/100 | Max Change (Delta): 0.150558
Iteration 30/100 | Max Change (Delta): 0.120122
Iteration 40/100 | Max Change (Delta): 0.097586
Iteration 50/100 | Max Change (Delta): 0.079681
Iteration 60/100 | Max Change (Delta): 0.065143
Iteration 70/100 | Max Change (Delta): 0.053275
Iteration 80/100 | Max Change (Delta): 0.043572
Iteration 90/100 | Max Change (Delta): 0.035638


INFO:root:Policy updated.
 36%|███▋      | 14594/40000 [19:21<21:55, 19.32it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029148

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  6.72583D+03    |proj g|=  1.00000D+00

At iterate    1    f=  6.40562D+03    |proj g|=  6.49831D-01

At iterate    2    f=  6.40141D+03    |proj g|=  6.21067D-01

At iterate    3    f=  6.39697D+03    |proj g|=  4.85434D-01

At iterate    4    f=  6.39696D+03    |proj g|=  1.11014D-01

At iterate    5    f=  6.39696D+03    |proj g|=  1.04431D-02

At iterate    6    f=  6.39696D+03    |proj g|=  7.48479D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.484 0.219 0.444 0.292]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.203153


 36%|███▋      | 14594/40000 [19:40<21:55, 19.32it/s]

Iteration 20/100 | Max Change (Delta): 0.149463
Iteration 30/100 | Max Change (Delta): 0.119278
Iteration 40/100 | Max Change (Delta): 0.096914
Iteration 50/100 | Max Change (Delta): 0.079136
Iteration 60/100 | Max Change (Delta): 0.064699
Iteration 70/100 | Max Change (Delta): 0.052912
Iteration 80/100 | Max Change (Delta): 0.043275
Iteration 90/100 | Max Change (Delta): 0.035395


INFO:root:Policy updated.
 40%|████      | 16101/40000 [20:33<20:07, 19.80it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.028950

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  7.49085D+03    |proj g|=  1.00000D+00

At iterate    1    f=  7.14725D+03    |proj g|=  6.52381D-01

At iterate    2    f=  7.14218D+03    |proj g|=  6.21825D-01

At iterate    3    f=  7.13681D+03    |proj g|=  4.86136D-01

At iterate    4    f=  7.13680D+03    |proj g|=  1.29091D-01

At iterate    5    f=  7.13680D+03    |proj g|=  1.87744D-02

At iterate    6    f=  7.13680D+03    |proj g|=  1.09555D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.485 0.202 0.449 0.297]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.203784


 40%|████      | 16101/40000 [20:50<20:07, 19.80it/s]

Iteration 20/100 | Max Change (Delta): 0.149921
Iteration 30/100 | Max Change (Delta): 0.119645
Iteration 40/100 | Max Change (Delta): 0.097209
Iteration 50/100 | Max Change (Delta): 0.079376
Iteration 60/100 | Max Change (Delta): 0.064895
Iteration 70/100 | Max Change (Delta): 0.053072
Iteration 80/100 | Max Change (Delta): 0.043407
Iteration 90/100 | Max Change (Delta): 0.035502


INFO:root:Policy updated.
 44%|████▍     | 17513/40000 [21:47<19:09, 19.55it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029037

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  8.27774D+03    |proj g|=  1.00000D+00

At iterate    1    f=  7.90870D+03    |proj g|=  6.54569D-01

At iterate    2    f=  7.90344D+03    |proj g|=  6.25642D-01

At iterate    3    f=  7.89788D+03    |proj g|=  4.80894D-01

At iterate    4    f=  7.89788D+03    |proj g|=  1.27873D-01

At iterate    5    f=  7.89788D+03    |proj g|=  1.88374D-02

At iterate    6    f=  7.89788D+03    |proj g|=  1.12225D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.48  0.205 0.442 0.294]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...


 44%|████▍     | 17513/40000 [22:00<19:09, 19.55it/s]

Iteration 10/100 | Max Change (Delta): 0.205708
Iteration 20/100 | Max Change (Delta): 0.151277
Iteration 30/100 | Max Change (Delta): 0.120664
Iteration 40/100 | Max Change (Delta): 0.098006
Iteration 50/100 | Max Change (Delta): 0.080020
Iteration 60/100 | Max Change (Delta): 0.065419
Iteration 70/100 | Max Change (Delta): 0.053500
Iteration 80/100 | Max Change (Delta): 0.043757
Iteration 90/100 | Max Change (Delta): 0.035789


INFO:root:Policy updated.
 47%|████▋     | 18947/40000 [23:00<17:55, 19.58it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029272

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  9.04899D+03    |proj g|=  1.00000D+00

At iterate    1    f=  8.66170D+03    |proj g|=  6.59668D-01

At iterate    2    f=  8.65433D+03    |proj g|=  6.27328D-01

At iterate    3    f=  8.64667D+03    |proj g|=  4.92681D-01

At iterate    4    f=  8.64666D+03    |proj g|=  1.84226D-01

At iterate    5    f=  8.64666D+03    |proj g|=  2.80739D-02

At iterate    6    f=  8.64666D+03    |proj g|=  1.59856D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.491 0.184 0.452 0.277]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...


 47%|████▋     | 18947/40000 [23:10<17:55, 19.58it/s]

Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.208646
Iteration 20/100 | Max Change (Delta): 0.153458
Iteration 30/100 | Max Change (Delta): 0.122329
Iteration 40/100 | Max Change (Delta): 0.099336
Iteration 50/100 | Max Change (Delta): 0.081099
Iteration 60/100 | Max Change (Delta): 0.066298
Iteration 70/100 | Max Change (Delta): 0.054219
Iteration 80/100 | Max Change (Delta): 0.044344
Iteration 90/100 | Max Change (Delta): 0.036269


INFO:root:Policy updated.
 51%|█████     | 20220/40000 [24:16<17:40, 18.66it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029665

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  9.84647D+03    |proj g|=  1.00000D+00

At iterate    1    f=  9.43365D+03    |proj g|=  6.61871D-01

At iterate    2    f=  9.42644D+03    |proj g|=  6.30539D-01

At iterate    3    f=  9.41865D+03    |proj g|=  4.72016D-01

At iterate    4    f=  9.41865D+03    |proj g|=  1.73684D-01

At iterate    5    f=  9.41865D+03    |proj g|=  2.43577D-02

At iterate    6    f=  9.41865D+03    |proj g|=  1.40231D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.47  0.17  0.452 0.302]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...


 51%|█████     | 20220/40000 [24:30<17:40, 18.66it/s]

Iteration 10/100 | Max Change (Delta): 0.209559
Iteration 20/100 | Max Change (Delta): 0.154050
Iteration 30/100 | Max Change (Delta): 0.122853
Iteration 40/100 | Max Change (Delta): 0.099766
Iteration 50/100 | Max Change (Delta): 0.081451
Iteration 60/100 | Max Change (Delta): 0.066587
Iteration 70/100 | Max Change (Delta): 0.054455
Iteration 80/100 | Max Change (Delta): 0.044537
Iteration 90/100 | Max Change (Delta): 0.036427


INFO:root:Policy updated.
 54%|█████▍    | 21604/40000 [25:34<16:38, 18.42it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029794

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.06371D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.01941D+04    |proj g|=  6.61744D-01

At iterate    2    f=  1.01865D+04    |proj g|=  6.31408D-01

At iterate    3    f=  1.01782D+04    |proj g|=  4.79900D-01

At iterate    4    f=  1.01782D+04    |proj g|=  1.77686D-01

At iterate    5    f=  1.01782D+04    |proj g|=  2.64243D-02

At iterate    6    f=  1.01782D+04    |proj g|=  1.46998D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.478 0.178 0.449 0.292]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...


 54%|█████▍    | 21604/40000 [25:50<16:38, 18.42it/s]

Iteration 10/100 | Max Change (Delta): 0.209190
Iteration 20/100 | Max Change (Delta): 0.153806
Iteration 30/100 | Max Change (Delta): 0.122587
Iteration 40/100 | Max Change (Delta): 0.099533
Iteration 50/100 | Max Change (Delta): 0.081256
Iteration 60/100 | Max Change (Delta): 0.066426
Iteration 70/100 | Max Change (Delta): 0.054323
Iteration 80/100 | Max Change (Delta): 0.044430
Iteration 90/100 | Max Change (Delta): 0.036339


INFO:root:Policy updated.
 57%|█████▋    | 22774/40000 [26:51<16:28, 17.43it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029722

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.14302D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.09466D+04    |proj g|=  6.57738D-01

At iterate    2    f=  1.09379D+04    |proj g|=  6.25849D-01

At iterate    3    f=  1.09285D+04    |proj g|=  4.86807D-01

At iterate    4    f=  1.09285D+04    |proj g|=  2.07747D-01

At iterate    5    f=  1.09285D+04    |proj g|=  3.57772D-02

At iterate    6    f=  1.09285D+04    |proj g|=  1.86728D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.485 0.179 0.459 0.289]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...


 57%|█████▋    | 22774/40000 [27:10<16:28, 17.43it/s]

Iteration 10/100 | Max Change (Delta): 0.207380
Iteration 20/100 | Max Change (Delta): 0.152526
Iteration 30/100 | Max Change (Delta): 0.121678
Iteration 40/100 | Max Change (Delta): 0.098829
Iteration 50/100 | Max Change (Delta): 0.080691
Iteration 60/100 | Max Change (Delta): 0.065967
Iteration 70/100 | Max Change (Delta): 0.053948
Iteration 80/100 | Max Change (Delta): 0.044123
Iteration 90/100 | Max Change (Delta): 0.036088


INFO:root:Policy updated.
 60%|██████    | 24058/40000 [28:10<15:35, 17.04it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029517

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.22426D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.17341D+04    |proj g|=  6.58283D-01

At iterate    2    f=  1.17257D+04    |proj g|=  6.27019D-01

At iterate    3    f=  1.17167D+04    |proj g|=  4.79152D-01

At iterate    4    f=  1.17167D+04    |proj g|=  1.89588D-01

At iterate    5    f=  1.17167D+04    |proj g|=  2.36598D-02

At iterate    6    f=  1.17167D+04    |proj g|=  1.32352D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.478 0.189 0.454 0.287]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...


 60%|██████    | 24058/40000 [28:20<15:35, 17.04it/s]

Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.207500
Iteration 20/100 | Max Change (Delta): 0.152558
Iteration 30/100 | Max Change (Delta): 0.121607
Iteration 40/100 | Max Change (Delta): 0.098750
Iteration 50/100 | Max Change (Delta): 0.080620
Iteration 60/100 | Max Change (Delta): 0.065908
Iteration 70/100 | Max Change (Delta): 0.053899
Iteration 80/100 | Max Change (Delta): 0.044083
Iteration 90/100 | Max Change (Delta): 0.036055


INFO:root:Policy updated.
 63%|██████▎   | 25355/40000 [29:27<14:22, 16.98it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029490

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.30500D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.25104D+04    |proj g|=  6.56823D-01

At iterate    2    f=  1.25012D+04    |proj g|=  6.25187D-01

At iterate    3    f=  1.24916D+04    |proj g|=  4.78816D-01

At iterate    4    f=  1.24916D+04    |proj g|=  2.01404D-01

At iterate    5    f=  1.24916D+04    |proj g|=  1.47591D-02

At iterate    6    f=  1.24916D+04    |proj g|=  8.41414D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.477 0.185 0.456 0.295]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...


 63%|██████▎   | 25355/40000 [29:40<14:22, 16.98it/s]

Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.207032
Iteration 20/100 | Max Change (Delta): 0.152255
Iteration 30/100 | Max Change (Delta): 0.121446
Iteration 40/100 | Max Change (Delta): 0.098642
Iteration 50/100 | Max Change (Delta): 0.080536
Iteration 60/100 | Max Change (Delta): 0.065841
Iteration 70/100 | Max Change (Delta): 0.053845
Iteration 80/100 | Max Change (Delta): 0.044038
Iteration 90/100 | Max Change (Delta): 0.036019


INFO:root:Policy updated.
 67%|██████▋   | 26715/40000 [30:47<13:00, 17.02it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029460

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.38596D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.32841D+04    |proj g|=  6.55386D-01

At iterate    2    f=  1.32756D+04    |proj g|=  6.25947D-01

At iterate    3    f=  1.32668D+04    |proj g|=  4.73205D-01

At iterate    4    f=  1.32668D+04    |proj g|=  2.01378D-01

At iterate    5    f=  1.32668D+04    |proj g|=  9.71557D-03

At iterate    6    f=  1.32668D+04    |proj g|=  5.91225D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final pro

INFO:root:Theta updated. New theta_hat: [0.472 0.201 0.449 0.296]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...


 67%|██████▋   | 26715/40000 [31:00<13:00, 17.02it/s]

Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.206052
Iteration 20/100 | Max Change (Delta): 0.151456
Iteration 30/100 | Max Change (Delta): 0.120827
Iteration 40/100 | Max Change (Delta): 0.098140
Iteration 50/100 | Max Change (Delta): 0.080129
Iteration 60/100 | Max Change (Delta): 0.065508
Iteration 70/100 | Max Change (Delta): 0.053573
Iteration 80/100 | Max Change (Delta): 0.043816
Iteration 90/100 | Max Change (Delta): 0.035837


INFO:root:Policy updated.
 70%|██████▉   | 27931/40000 [32:08<12:17, 16.36it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029311

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.46777D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.40853D+04    |proj g|=  6.57685D-01

At iterate    2    f=  1.40777D+04    |proj g|=  6.30962D-01

At iterate    3    f=  1.40701D+04    |proj g|=  4.64814D-01

At iterate    4    f=  1.40701D+04    |proj g|=  1.92860D-01

At iterate    5    f=  1.40701D+04    |proj g|=  9.65621D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.463 0.217 0.433 0.292]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...


 70%|██████▉   | 27931/40000 [32:20<12:17, 16.36it/s]

Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.208978
Iteration 20/100 | Max Change (Delta): 0.153607
Iteration 30/100 | Max Change (Delta): 0.122481
Iteration 40/100 | Max Change (Delta): 0.099448
Iteration 50/100 | Max Change (Delta): 0.081187
Iteration 60/100 | Max Change (Delta): 0.066370
Iteration 70/100 | Max Change (Delta): 0.054277
Iteration 80/100 | Max Change (Delta): 0.044392
Iteration 90/100 | Max Change (Delta): 0.036308


INFO:root:Policy updated.
 73%|███████▎  | 29147/40000 [33:29<11:19, 15.98it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029697

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.55092D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.48848D+04    |proj g|=  6.57528D-01

At iterate    2    f=  1.48761D+04    |proj g|=  6.31148D-01

At iterate    3    f=  1.48674D+04    |proj g|=  4.73607D-01

At iterate    4    f=  1.48674D+04    |proj g|=  2.07696D-01


 73%|███████▎  | 29147/40000 [33:40<11:19, 15.98it/s]


At iterate    5    f=  1.48674D+04    |proj g|=  1.04729D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   1.047D-02   1.487D+04
  F =   14867.364300527506     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


INFO:root:Theta updated. New theta_hat: [0.472 0.208 0.43  0.297]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.208095
Iteration 20/100 | Max Change (Delta): 0.153008
Iteration 30/100 | Max Change (Delta): 0.122046
Iteration 40/100 | Max Change (Delta): 0.099111
Iteration 50/100 | Max Change (Delta): 0.080916
Iteration 60/100 | Max Change (Delta): 0.066150
Iteration 70/100 | Max Change (Delta): 0.054098
Iteration 80/100 | Max Change (Delta): 0.044245
Iteration 90/100 | Max Change (Delta): 0.036188


INFO:root:Policy updated.
 76%|███████▌  | 30440/40000 [34:52<10:03, 15.84it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029598

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.63412D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.56874D+04    |proj g|=  6.57075D-01

At iterate    2    f=  1.56786D+04    |proj g|=  6.31665D-01

At iterate    3    f=  1.56698D+04    |proj g|=  4.72381D-01

At iterate    4    f=  1.56698D+04    |proj g|=  2.12857D-01

At iterate    5    f=  1.56698D+04    |proj g|=  1.13230D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.471 0.213 0.428 0.294]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...


 76%|███████▌  | 30440/40000 [35:10<10:03, 15.84it/s]

Iteration 10/100 | Max Change (Delta): 0.208265
Iteration 20/100 | Max Change (Delta): 0.153053
Iteration 30/100 | Max Change (Delta): 0.122000
Iteration 40/100 | Max Change (Delta): 0.099052
Iteration 50/100 | Max Change (Delta): 0.080863
Iteration 60/100 | Max Change (Delta): 0.066105
Iteration 70/100 | Max Change (Delta): 0.054060
Iteration 80/100 | Max Change (Delta): 0.044214
Iteration 90/100 | Max Change (Delta): 0.036163


INFO:root:Policy updated.
 79%|███████▉  | 31734/40000 [36:15<08:44, 15.75it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029578

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.71740D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.65005D+04    |proj g|=  6.59066D-01

At iterate    2    f=  1.64913D+04    |proj g|=  6.34217D-01

At iterate    3    f=  1.64823D+04    |proj g|=  4.72416D-01

At iterate    4    f=  1.64822D+04    |proj g|=  2.10998D-01

At iterate    5    f=  1.64822D+04    |proj g|=  1.67539D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.471 0.211 0.422 0.292]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...


 79%|███████▉  | 31734/40000 [36:30<08:44, 15.75it/s]

Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.209289
Iteration 20/100 | Max Change (Delta): 0.153830
Iteration 30/100 | Max Change (Delta): 0.122586
Iteration 40/100 | Max Change (Delta): 0.099518
Iteration 50/100 | Max Change (Delta): 0.081240
Iteration 60/100 | Max Change (Delta): 0.066412
Iteration 70/100 | Max Change (Delta): 0.054311
Iteration 80/100 | Max Change (Delta): 0.044420
Iteration 90/100 | Max Change (Delta): 0.036331


INFO:root:Policy updated.
 83%|████████▎ | 33045/40000 [37:38<07:21, 15.77it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029715

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.80100D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.73131D+04    |proj g|=  6.61478D-01

At iterate    2    f=  1.73028D+04    |proj g|=  6.38594D-01

At iterate    3    f=  1.72925D+04    |proj g|=  4.84803D-01


 83%|████████▎ | 33045/40000 [37:50<07:21, 15.77it/s]


At iterate    4    f=  1.72925D+04    |proj g|=  2.07583D-01

At iterate    5    f=  1.72925D+04    |proj g|=  1.49969D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   1.500D-02   1.729D+04
  F =   17292.527614000246     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


INFO:root:Theta updated. New theta_hat: [0.483 0.207 0.412 0.286]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.210659
Iteration 20/100 | Max Change (Delta): 0.154757
Iteration 30/100 | Max Change (Delta): 0.123322
Iteration 40/100 | Max Change (Delta): 0.100120
Iteration 50/100 | Max Change (Delta): 0.081732
Iteration 60/100 | Max Change (Delta): 0.066814
Iteration 70/100 | Max Change (Delta): 0.054640
Iteration 80/100 | Max Change (Delta): 0.044689
Iteration 90/100 | Max Change (Delta): 0.036551


INFO:root:Policy updated.
 86%|████████▌ | 34319/40000 [39:02<06:04, 15.58it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029895

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.88551D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.81272D+04    |proj g|=  6.61474D-01

At iterate    2    f=  1.81166D+04    |proj g|=  6.39845D-01

At iterate    3    f=  1.81060D+04    |proj g|=  4.86508D-01

At iterate    4    f=  1.81060D+04    |proj g|=  2.08618D-01

At iterate    5    f=  1.81060D+04    |proj g|=  1.81363D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.485 0.208 0.407 0.286]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...


 86%|████████▌ | 34319/40000 [39:20<06:04, 15.58it/s]

Iteration 10/100 | Max Change (Delta): 0.210797
Iteration 20/100 | Max Change (Delta): 0.154915
Iteration 30/100 | Max Change (Delta): 0.123397
Iteration 40/100 | Max Change (Delta): 0.100172
Iteration 50/100 | Max Change (Delta): 0.081771
Iteration 60/100 | Max Change (Delta): 0.066845
Iteration 70/100 | Max Change (Delta): 0.054665
Iteration 80/100 | Max Change (Delta): 0.044709
Iteration 90/100 | Max Change (Delta): 0.036567


INFO:root:Policy updated.
 89%|████████▉ | 35550/40000 [40:24<04:48, 15.41it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029908

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  1.96818D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.89204D+04    |proj g|=  6.61396D-01

At iterate    2    f=  1.89088D+04    |proj g|=  6.38685D-01

At iterate    3    f=  1.88972D+04    |proj g|=  4.89113D-01

At iterate    4    f=  1.88972D+04    |proj g|=  2.07151D-01

At iterate    5    f=  1.88972D+04    |proj g|=  2.40580D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.487 0.206 0.411 0.283]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...


 89%|████████▉ | 35550/40000 [40:41<04:48, 15.41it/s]

Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.211019
Iteration 20/100 | Max Change (Delta): 0.154986
Iteration 30/100 | Max Change (Delta): 0.123476
Iteration 40/100 | Max Change (Delta): 0.100225
Iteration 50/100 | Max Change (Delta): 0.081812
Iteration 60/100 | Max Change (Delta): 0.066878
Iteration 70/100 | Max Change (Delta): 0.054692
Iteration 80/100 | Max Change (Delta): 0.044731
Iteration 90/100 | Max Change (Delta): 0.036585


INFO:root:Policy updated.
 92%|█████████▏| 36628/40000 [41:45<03:47, 14.81it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.029923

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  2.05129D+04    |proj g|=  1.00000D+00

At iterate    1    f=  1.97219D+04    |proj g|=  6.60992D-01

At iterate    2    f=  1.97103D+04    |proj g|=  6.38711D-01

At iterate    3    f=  1.96986D+04    |proj g|=  4.85517D-01

At iterate    4    f=  1.96985D+04    |proj g|=  2.24039D-01

At iterate    5    f=  1.96985D+04    |proj g|=  4.00360D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

INFO:root:Theta updated. New theta_hat: [0.484 0.204 0.409 0.29 ]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...


 92%|█████████▏| 36628/40000 [42:01<03:47, 14.81it/s]

Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.211663
Iteration 20/100 | Max Change (Delta): 0.155487
Iteration 30/100 | Max Change (Delta): 0.123882
Iteration 40/100 | Max Change (Delta): 0.100558
Iteration 50/100 | Max Change (Delta): 0.082085
Iteration 60/100 | Max Change (Delta): 0.067101
Iteration 70/100 | Max Change (Delta): 0.054874
Iteration 80/100 | Max Change (Delta): 0.044880
Iteration 90/100 | Max Change (Delta): 0.036707


INFO:root:Policy updated.
 95%|█████████▍| 37919/40000 [43:07<02:18, 15.07it/s]INFO:root:Updating optimal policy...


Iteration 100/100 | Max Change (Delta): 0.030023

Value iteration finished (max iterations reached).
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         4 variables are exactly at the bounds

At iterate    0    f=  2.13560D+04    |proj g|=  1.00000D+00

At iterate    1    f=  2.05432D+04    |proj g|=  6.62296D-01

At iterate    2    f=  2.05316D+04    |proj g|=  6.41001D-01

At iterate    3    f=  2.05199D+04    |proj g|=  4.82312D-01


 95%|█████████▍| 37919/40000 [43:21<02:18, 15.07it/s]


At iterate    4    f=  2.05199D+04    |proj g|=  2.17503D-01

At iterate    5    f=  2.05199D+04    |proj g|=  3.94097D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    4      5      7      8     0     0   3.941D-02   2.052D+04
  F =   20519.894347108559     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


INFO:root:Theta updated. New theta_hat: [0.481 0.204 0.405 0.292]


Discretization setup:
  - Cumulative Context (cc):    100 steps up to 8.00
  - Cust. Degradation (cx):     50 steps up to 1.00
  - Cust. Revenue (cu):         100 steps up to 1.00 (99.9th percentile)
  - Rental Duration (T):        100 steps up to 76.01 (99.95th percentile)
Pre-computing expectations from 100000 customer samples...
Starting Numba-accelerated pre-computation of arrival dynamics...
Pre-computation complete. ✅

Starting Value Iteration...
Iteration 10/100 | Max Change (Delta): 0.211901
Iteration 20/100 | Max Change (Delta): 0.155612
Iteration 30/100 | Max Change (Delta): 0.123952
Iteration 40/100 | Max Change (Delta): 0.100609
Iteration 50/100 | Max Change (Delta): 0.082124
Iteration 60/100 | Max Change (Delta): 0.067133
Iteration 70/100 | Max Change (Delta): 0.054900
Iteration 80/100 | Max Change (Delta): 0.044901
Iteration 90/100 | Max Change (Delta): 0.036725


INFO:root:Policy updated.
100%|██████████| 40000/40000 [44:33<00:00, 14.96it/s]
INFO:root:Simulation finished.


Iteration 100/100 | Max Change (Delta): 0.030037

Value iteration finished (max iterations reached).
Policy saved to models/simulator_20250916_095648.discrete_policy.pkl


INFO:root:Simulation state saved to models/simulator_20250916_095648.


In [None]:
degradation_df = pd.DataFrame(simulator.degradation_history)
simulation_df = pd.DataFrame(simulator.history)

degradation_df.to_csv(f'data/degradation_data_{current_time}.csv', index=False)
simulation_df.to_csv(f'data/simulation_data_{current_time}.csv', index=False)
simulator.save(f'models/simulator_{current_time}')

In [None]:
degradation_df = pd.DataFrame(simulator.degradation_history)
simulation_df = pd.DataFrame(simulator.history)

degradation_df.to_csv(f'data/degradation_data_{current_time}.csv', index=False)
simulation_df.to_csv(f'data/simulation_data_{current_time}.csv', index=False)
simulator.save(f'models/simulator_{current_time}')

### Convergence of $\hat\theta$

In [None]:
# simulator = Simulator.load('models/simulator_0914')

history = pd.DataFrame(simulator.history)
degradation_history = pd.DataFrame(simulator.degradation_history)

epsilons = [0.20 * (0.95 ** i) for i in range(len(simulator.theta_updates))]

times = []

for d in simulator.theta_updates:
    idx, theta_hat = d['customer_idx'], d['theta_hat']
    time = history[history.customer_id == idx]['calendar_time'].max()
    times.append(time)
    
# plot L2, and L-inf norms of utility updates
L2_errors = [np.linalg.norm(update['theta_hat'] - THETA_TRUE) for update in simulator.theta_updates]
Linf_errors = [np.linalg.norm(update['theta_hat'] - THETA_TRUE, ord=np.inf) for update in simulator.theta_updates]

plt.figure(figsize=(12, 6))
plt.plot(times, L2_errors, label='$L_2$ Norm Error', marker='o')
plt.plot(times, Linf_errors, label='$L_\infty$ Norm Error', marker='x')
plt.plot(times, epsilons, label='Exploration Rate (ε)', linestyle='--', color='gray')
# plt.yscale('log')
plt.xlabel('Number of Customers Processed', fontsize=14)
plt.ylabel('Error Norm', fontsize=14)

plt.title('Convergence of $\|\hat{\\theta} - \\theta\|$', fontsize=18)
plt.legend(fontsize=12)
plt.grid(True)
plt.savefig('figures/utility_convergence.pdf')
plt.show()

### Convergence of $\hat u$

In [None]:
simulator.utility_updates

# plot L2, and L-inf norms of utility updates
L2_errors = [np.linalg.norm(update['u_hat'] - UTILITY_TRUE) for update in simulator.utility_updates]
Linf_errors = [np.linalg.norm(update['u_hat'] - UTILITY_TRUE, ord=np.inf) for update in simulator.utility_updates]

plt.figure(figsize=(12, 6))
plt.plot(L2_errors, label='$L_2$ Norm Error', marker='o')
plt.plot(Linf_errors, label='$L_\infty$ Norm Error', marker='x')
# plt.yscale('log')
plt.xlabel('Number of Customers Processed', fontsize=14)
plt.ylabel('Error Norm', fontsize=14)

plt.title('Convergence of $\|\hat u - u\|$', fontsize=18)
plt.legend(fontsize=12)
plt.grid(True)
plt.savefig('figures/utility_convergence.pdf')
plt.show()

### Revenue of Online Learner

In [None]:
degradation_df = pd.DataFrame(simulator.degradation_history)
simulation_df = pd.DataFrame(simulator.history)

simulation_df['net_profit'] = simulation_df['profit'] + simulation_df['loss']
simulation_df['cumulative_net_profit'] = simulation_df['net_profit'].cumsum()

ax = plt.figure(figsize=(10,6))

# plot cumulative profit and loss over time
plt.plot(simulation_df['calendar_time'], simulation_df['cumulative_net_profit'], label='Cumulative Net Profit')
plt.xlabel('Calendar Time')
plt.ylabel('Cumulative Net Profit')
plt.title('Cumulative Net Profit Over Time')
plt.legend()
plt.grid()
plt.savefig('figures/cumulative_net_profit_online.pdf')
plt.show()

## Training policy under perfect information

### Revenue of Optimal Policy

In [None]:
class PerfectDegradationLearner:
    def __init__(self, d, theta_true, hazard_model):
        self.d = d
        self.theta_true = theta_true
        self.hazard_model = hazard_model  # Placeholder, not used
        
    def get_theta(self):
        return self.theta_true
    
    def cum_baseline(self, t):
        return self.hazard_model.Lambda_0(t)
    
    def inverse_cum_baseline(self, u):
        return self.hazard_model.Lambda_0_inverse(u)
    
perfect_degradation_learner = PerfectDegradationLearner(
    d=D, 
    theta_true=THETA_TRUE,
    hazard_model=usage_exp_hazard_model,
)

# N_perfect = [100, 40, 80, 150]

perfect_dpagent = DiscretizedDPAgent(
    N=simulator.training_hyperparams['N'], # grid sizes [cum_context, context, duration, active_time]
    max_cumulative_context=simulator.training_hyperparams['max_cumulative_context'],
    # max_active_time=simulator.training_hyperparams['max_active_time'],
    u_hat=UTILITY_TRUE,
    degradation_learner=perfect_degradation_learner,
    customer_generator=customer_gen,
    params=simulator.mdp_params,
)
# perfect_dpagent.run_value_iteration(simulator.training_hyperparams['num_value_iterations'])

# weight = torch.load('weights/perfect_dpagent_q_network.pth', map_location=torch.device('cuda'))
# perfect_dpagent.q_network.load_state_dict(weight)
# perfect_dpagent.q_network.to(perfect_dpagent.device)
# perfect_dpagent.q_network.eval()
perfect_dpagent._precompute_dynamics(100000)
perfect_dpagent.run_value_iteration(150)
perfect_policy = perfect_dpagent.get_policy('greedy')

In [None]:
simulation_df = pd.DataFrame(simulator.history)
# simulator.degradation_learner = perfect_degradation_learner
samples = simulator.run_full_exploit(100000, perfect_policy, {'tau': 0.01})
samples = pd.DataFrame(samples)

simulation_df['net_profit'] = simulation_df['profit'] + simulation_df['loss']
simulation_df['cumulative_net_profit'] = simulation_df['net_profit'].cumsum()
samples['net_profit'] = samples['profit'] + samples['loss']
samples['cumulative_net_profit'] = samples['net_profit'].cumsum()

samples['netprofit_per_time'] = samples['cumulative_net_profit'] / samples['calendar_time']
simulation_df['netprofit_per_time'] = simulation_df['cumulative_net_profit'] / simulation_df['calendar_time']

In [None]:
def calculate_rolling_rate(df, time_col, value_col, window_size):
    """
    Calculates the rate of a value over a rolling time window on irregular time series data.

    Args:
        df (pd.DataFrame): The input dataframe.
        time_col (str): The name of the column with time data.
        value_col (str): The name of the column with values to aggregate (e.g., 'net_profit').
        window_size (int): The duration of the rolling time window.

    Returns:
        pd.Series: A series containing the calculated rolling rate for each row.
    """
    # Ensure the dataframe is sorted by time, which is crucial.
    df = df.sort_values(time_col).reset_index(drop=True)
    
    times = df[time_col].values
    values = df[value_col].values
    
    # For each end time `t_i`, find the start time `t_i - window`.
    start_times = times - window_size
    
    # Use searchsorted to find the index where each start_time would be inserted.
    # This gives us the starting index of each time window efficiently.
    start_indices = np.searchsorted(times, start_times, side='left')
    
    # Use a cumulative sum to efficiently calculate the sum over any slice [j, i].
    value_cumsum = np.cumsum(values)
    
    # The sum for a window ending at `i` is cumsum[i] - cumsum[start_index - 1].
    # We create a shifted cumulative sum array to handle the `start_index - 1` lookup.
    shifted_cumsum = np.concatenate(([0], value_cumsum[:-1]))
    
    # Calculate the sum of values within each rolling window.
    window_sums = value_cumsum - shifted_cumsum[start_indices]
    
    # The rate is the sum of profit in the window divided by the window's duration.
    profit_rate = window_sums / window_size
    
    return pd.Series(profit_rate, index=df.index)


# --- 2. Calculate net profit and the rolling rate for each DataFrame ---

window_duration = 20000 # Define the time window for the rolling rate

for df in [simulation_df]:
# for df in [samples]:
    df['net_profit'] = df['profit'] + df['loss']
    # Add the new 'profit_rate' column using our helper function
    df['profit_rate'] = calculate_rolling_rate(df, 'calendar_time', 'net_profit', window_duration)

In [None]:
# --- 3. Plot the new rolling profit rate ---

max_time = simulation_df['calendar_time'].max()
# max_time = min(simulation_df['calendar_time'].max(), samples['calendar_time'].max())
# samples_plot = samples[(window_duration <= samples['calendar_time']) & (samples['calendar_time'] <= max_time)]
simulations_plot = simulation_df[
    (window_duration <= simulation_df['calendar_time']) &
    (simulation_df['calendar_time'] <= max_time)]


plt.figure(figsize=(10, 6))

# plt.plot(samples_plot['calendar_time'], samples_plot['profit_rate'], label=f'Optimal Policy (Rolling {window_duration} unit avg)')
plt.plot(simulations_plot['calendar_time'], simulations_plot['profit_rate'], label=f'Online Learning (Rolling {window_duration} unit avg)')

plt.xlabel('Calendar Time')
plt.ylabel('Profit Rate (Profit / Time Unit)')
plt.title(f'Rolling Profit Rate Over Time (Window = {window_duration} time units)')
plt.legend()
plt.grid(True)
plt.show()