In [1]:
import pickle
import numpy as np
import pandas as pd
from scipy.optimize import minimize

from simulation import Simulator, CustomerGenerator
from hazard_models import ExponentialHazard
from utility_learner import ProjectedVolumeLearner, diam
from degradation_learner import DegradationLearner

from utils import unit_ball_rejection_sample, correct_signs
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

import logging
logging.basicConfig(level=logging.INFO)

In [2]:
# --- 2. Define Sampling Functions ---
# def context_sampler() -> np.ndarray:
#     """Samples a customer's context vector from a uniform distribution."""
#     return np.random.uniform(low=0.0, high=1.0, size=D)

def context_sampler() -> np.ndarray:
    """Samples a customer's context vector uniformly from the unit ball."""
    return np.abs(unit_ball_rejection_sample(D))

def rental_sampler() -> float:
    """Samples a customer's desired rental duration from an exponential distribution."""
    return np.random.exponential(scale=20.0)

def interarrival_sampler() -> float:
    """Samples the time until the next customer arrives."""
    return np.random.exponential(scale=5.0)

In [3]:
# --- 1. Simulation Configuration ---
D = 5                                  # Dimension of context vectors
LAMBDA_VAL = 0.001                     # Baseline hazard constant
NUM_CUSTOMERS = 2000                   # Total number of customers to simulate, i.e. T

# Set a random seed for reproducibility
np.random.seed(41)

# Ground truth vectors
THETA_TRUE = np.array([0.5, 0.2, 0.1, 0.3, 0.4])    # For degradation
UTILITY_TRUE = context_sampler()  # For customer's willingness to pay

# --- Machine's Pricing Vector 'r' ---
# This is a fallback pricing vector, when we don't feed u_hat to calculate_price
PRICING_R = np.zeros(D)

In [4]:
usage_exp_hazard_model = ExponentialHazard(lambda_val=LAMBDA_VAL)
# spontaneous_exp_hazard_model = None # ExponentialHazard(lambda_val=0.01)

customer_gen = CustomerGenerator(
    d=D,
    context_sampler=context_sampler,
    rental_sampler=rental_sampler,
    interarrival_sampler=interarrival_sampler
)

centroid_params = {
    # 'num_samples': 2000,
    # 'thin': None,
    # 'burn_in': 500 * D ** 2,
    # 'tol': 1e-4,
    # 'rho_target': 0.01
}

termination_rule = lambda diameter: diameter < 0.11  # Example custom termination rule

projected_volume_learner = ProjectedVolumeLearner(
    T=NUM_CUSTOMERS, 
    d=D, 
    centroid_params=centroid_params,
    incentive_constant=1.1,
    termination_rule=termination_rule,
)

mdp_params = {
    'replacement_cost': 1.5,   # Cost to replace the machine
    'failure_cost': 0.75,      # Additional penalty for in-service failure
    'holding_cost_rate': 0.02,   # Cost per unit of idle time
    'gamma': 0.999,             # Discount factor
    'learning_rate': 1e-4,      # Learning rate for the Adam optimizer
    'target_update_freq': 10    # How often to update the target network (in iterations)
}

training_hyperparams = {
    'num_iterations': 50, # Number of training iterations per policy update
    'dataset_size': 50000,      # Number of transitions to generate for the offline dataset
    'batch_size': 256           # Batch size for training
}

# Instantiate the Simulator with the new parameters
simulator = Simulator(
    d=D,
    T=NUM_CUSTOMERS,
    
    theta_true=THETA_TRUE,
    utility_true=UTILITY_TRUE,
    pricing_r=PRICING_R,
    
    usage_hazard_model=usage_exp_hazard_model,
    customer_generator=customer_gen,
    projected_volume_learner=projected_volume_learner,  # Use default ProjectedVolumeLearner
    
    mdp_params=mdp_params,
    training_hyperparams=training_hyperparams,
    policy_update_threshold=5,
    time_normalize=True,
)

In [5]:
# from degradation_learner import breslow_baseline_estimator

# degradation_history = pd.DataFrame(simulator.degradation_history)
# degradation_history['life_id'] = (degradation_history['event'].shift(1).fillna(-99) == 1).cumsum()  # 0 after breakdown

# breslow_df = breslow_baseline_estimator(
#     degradation_history, 
#     simulator.degradation_learner.get_theta()
# )

# breslow_df = breslow_df[breslow_df['delta_t'] > 0]
# times = breslow_df['time'].values
# lambda_step = breslow_df['lambda_0'].values

# times

In [None]:
# simulator.projected_volume_learner.is_terminated = True
simulation_data = simulator.run(num_customers=NUM_CUSTOMERS)
degradation_df = pd.DataFrame(simulator.degradation_history)
simulation_df = pd.DataFrame(simulation_data)

INFO:root:Starting simulation for 2000 customers...
  0%|          | 0/2000 [00:00<?, ?it/s]

Set parameter Username


INFO:gurobipy:Set parameter Username


Set parameter LicenseID to value 2651514


INFO:gurobipy:Set parameter LicenseID to value 2651514


Academic license - for non-commercial use only - expires 2026-04-14


INFO:gurobipy:Academic license - for non-commercial use only - expires 2026-04-14
INFO:root:Customer 1: Diameter: 1.0118
  0%|          | 1/2000 [00:02<1:33:30,  2.81s/it]INFO:root:Customer 2: Diameter: 0.9116
  0%|          | 2/2000 [00:05<1:37:46,  2.94s/it]INFO:root:Customer 3: Diameter: 0.6042
  0%|          | 3/2000 [00:09<1:42:01,  3.07s/it]INFO:root:Customer 4: Diameter: 0.6316
  0%|          | 4/2000 [00:12<1:46:51,  3.21s/it]INFO:root:Customer 5: Diameter: 0.4607
  0%|          | 5/2000 [00:16<1:53:10,  3.40s/it]INFO:root:Customer 6: Diameter: 0.5092
  0%|          | 6/2000 [00:20<1:58:42,  3.57s/it]INFO:root:Customer 7: Diameter: 0.3132
  0%|          | 7/2000 [00:24<2:05:36,  3.78s/it]INFO:root:Customer 8: Diameter: 0.3842
  0%|          | 8/2000 [00:28<2:12:21,  3.99s/it]INFO:root:Customer 9: Diameter: 0.1759
  0%|          | 9/2000 [00:33<2:18:48,  4.18s/it]INFO:root:Customer 10: Diameter: 0.2478
  0%|          | 10/2000 [00:38<2:26:41,  4.42s/it]INFO:root:Customer 11: Dia

In [None]:
simulation_df

In [None]:
degradation_learner = DegradationLearner(d=D, initial_theta=np.zeros(D))

degradation_learner.fit(degradation_history)
degradation_learner.get_theta()

Testing Policy

In [None]:
i = 50

X_i = df.loc[i, 'sum_of_contexts_after']
I_i = 3 # df.loc[i, '']
x_i = df.loc[i+1, 'customer_context']
T_i = df.loc[i+1, 'rental_duration']

arrival_state = np.concatenate([
    X_i,
    x_i,
    [T_i, I_i, 0.0]
])
action_arrival = optimal_policy(arrival_state)
action_map = {0: 'Give Max Acceptable Price', 1: 'Shutdown'}
print(f"Sample Arrival State. Optimal Action: {action_map[action_arrival]}")


In [None]:
# Test Case 2: Departure State
departure_state = np.concatenate([
    X_i+x_i*10, 
    np.zeros(D), 
    [0.0, I_i, 1.0]
])
action_departure = optimal_policy(departure_state)
action_map = {2: 'Replace Machine', 3: 'Do Not Replace'}
print(f"Sample Departure State. Optimal Action: {action_map[action_departure]}")
