# Load Dataset

We scale the x,y coordinates to be within the unit square.
For entries that did not have capacity, we randomly sample without replacement from the distribution of reported capacitites.

Before the coordinate transformation, a distance of ~540 is roughly 10 min walk. (what units are these coordinates? I estimated this by just picking two hotels and looking at google maps)

DC Office of the Chief Technology Officer (OCTO). "Hotels." ArcGIS Hub. Accessed February 17, 2026. https://hub.arcgis.com/datasets/DCGIS::hotels/about. 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors

df = pd.read_csv('Hotels.csv')
coord = df[['XCOORD', 'YCOORD']].values
capacities = np.array(df[['NUMROOMS']].values).flatten()

# scale the x,y coordinates to be within the unit square.
scale = max(np.max(coord[:, 0]) - np.min(coord[:, 0]), np.max(coord[:, 1]) - np.min(coord[:, 1]))
print(scale)
coord[:,0] = (coord[:,0] - np.min(coord[:,0]))/scale
coord[:,1] = (coord[:,1] - np.min(coord[:,1]))/scale

# For entries that did not have capacity, we randomly sample without replacement from the distribution of reported capacitites
permuted_capacities = np.random.permutation(capacities[~np.isnan(capacities)])
print(permuted_capacities[0:np.sum(np.isnan(capacities))])
capacities[np.isnan(capacities)] = permuted_capacities[0:np.sum(np.isnan(capacities))]

We plot the distribution of the ground truth capacities (including the artificially filled in capacities).

In [None]:
sorted_arr = np.sort(capacities)
plt.plot(sorted_arr)
plt.xlabel('Hotels Sorted')
plt.ylabel('Capacities')
plt.show()


We plot the locations of the hotels with the colors given by the capacities.

In [None]:
norm = colors.PowerNorm(gamma=0.5, vmin=capacities.min(), vmax=capacities.max())
plt.scatter(coord[:,0], coord[:,1], c=capacities, cmap='plasma', norm=norm)
plt.colorbar(label='Capacity')
plt.xlabel('X Coordinate')
plt.ylabel('Y Coordinate')
plt.show()


We make the capacities coarser, setting them to ceiling(capacity / 50).

In [None]:
capacities = np.ceil(capacities / 10)

## 1. Model <a id='sec1'></a> 
We will use a linear reward function
$$\mu_{it}(S_{it}): = E[Y_{it} \mid S_{it}, W] = a_{i} + b_{it} S_{it}.$$
We choose $a_{i} \sim U[0,1]$, and $b_{it} = (1 + 0.2 \epsilon_{it})/m_i$, where $\epsilon_{it} \sim U[0,1]$.

The state evolves as a clipped random walk with states $\{0,1,...,m_i\}$. Specifially, define *competition level* as the proportion of neighbors assigned arm $1$, formally,  
$$C_{it} = \frac{\sum_{j\in {\cal N}(i)\backslash i}W_{jt}}{|{\cal N}(i)\backslash i|}.$$

At each time $t$, with probability $\lambda^l_{it}$, the state does not change. We choose $\lambda^l_{it} = 0.1$.

Conditioned on the event that the state changes, the departure and arrival rates are governed by parameters $\lambda^d_{it} \sim U[0,1]$ and $$\lambda^a_{it} = \sigma (\alpha_{it} + \beta_{it} W_{it} - \gamma{it} C_{it}),$$ where $\sigma(\cdot)$ denotes the sigmoid function. We choose $\alpha_{it} \sim N(-5,2)$, $\beta_{it} \sim N(10,4)$, $\gamma_{it} \sim U[0,\beta_{it}/2]$.

Conditioned on the event that the state changes, the probability of a departure (state decreasing)is $\lambda^d_{it}/(\lambda^a_{it} + \lambda^d_{it})$, and the probability of an arrival (state increasing) is $\lambda^a_{it}/(\lambda^a_{it} + \lambda^d_{it})$. The states are then clipped to be within $\{0,1,2, \dots m_i\}$.

By construction $\gamma_{it} \leq \beta_{it}$ such that under full treatment the net effect is still positive.

For a given $\delta$, the exposure mapping is computed as 
$$\prod_{t' = t-r}^{t} \mathbb{I}\Big(W_{it} = a, \sum_{j\in {\cal N}(i)\backslash i}\mathbb{I}(W_{jt} = a) \geq (1-\delta) |{\cal N}(i)\backslash i|\Big).$$

In [None]:
import time, importlib, sys, os, pickle
import pandas as pd
import numpy as np
from datetime import datetime
from scipy import stats
import matplotlib.pyplot as plt

# set up directory
current_dir = os.getcwd() # Get current directory
print("current dir:", current_dir)

parent_dir = os.path.dirname(current_dir) # Go up by one level
sys.path.append(parent_dir) # Add parent dir to sys path

from helpers import utils, graph_helpers, mdp_helpers, stats_helpers, print_nicely

In [None]:
sim_config = {
    "T": 100, # time horizon
    'time_block_length': 10, # length of time blocks used for randomized design
    "recency": 10, # how many rounds to look back in HT/Hajek estimator 
    "delta": 0.2, #  expo map X_ita^r = 1 if more than 1-delta spatio-temp neighbors are assigned arm a
    'num_W_for_computing_prop': 10**4, # num of trtmt asgn mtx used for finding prop score (monte carlo) 
    'initial_state': 1
}

In [None]:
importlib.reload(graph_helpers)
importlib.reload(stats_helpers)

########## SPATIAL GRAPH WITH UNIFORM COORDS ##########

sim_config.update({
    'num_cells_per_dim': 4, # defines spatial partition of unit square used for randomized design
    "kappa": 0.1, # two nodes interfere (thru reward and transition) if Euclidean dist between coordinates <= kappa 
})

# create interference graph
adj_matrix = graph_helpers.build_adjacency_matrix_from_coords(coord, sim_config['kappa'])
print(np.mean(np.sum(adj_matrix,axis=1)))

# setup spatial clusters
cluster_matrix = graph_helpers.spatial_clustering_map(coord, sim_config['num_cells_per_dim'])
adj_clusters = np.matmul(adj_matrix,cluster_matrix) 
print(np.mean(np.sum(adj_clusters,axis=1)))

In [None]:
importlib.reload(mdp_helpers)

# initialize Markov chain model
num_units = adj_matrix.shape[0]
num_rounds = sim_config['T']
max_inventory=capacities

C_baseline = np.outer(np.random.random(num_units), np.ones(num_rounds))
C_slope = np.ones((num_units,num_rounds)) + 0.2 * np.random.random((num_units, num_rounds))
C_lazy = 0.1 * np.ones((num_units,num_rounds))
C_alpha = np.random.normal(loc = -5, scale = 2, size = ((num_units,num_rounds)))
C_beta = np.random.normal(loc = 10, scale = 4, size = ((num_units,num_rounds)))
C_gamma = 0.5*np.random.random((num_units,num_rounds)) * C_beta
C_depart = np.random.random((num_units,num_rounds))

MC = mdp_helpers.InventoryMarkovChain(
    max_inventory=max_inventory,
    adj_matrix=adj_matrix,
    num_rounds=sim_config['T'],
    C_baseline = C_baseline,
    C_slope= C_slope,
    C_lazy = C_lazy,
    C_alpha = C_alpha,
    C_beta = C_beta,
    C_gamma = C_gamma,
    C_depart = C_depart)

In [None]:
importlib.reload(mdp_helpers)
# approximate ATE via Monte Carlo

start_time = time.time()

num_sims_apx_ate = 10**4
initial_state = sim_config['initial_state']

sim_results_0 = MC.simulate_MC(initial_state * np.ones(num_units), np.zeros((num_units, num_rounds,num_sims_apx_ate)), use_sigmoid=True)
sim_results_1 = MC.simulate_MC(initial_state * np.ones(num_units), np.ones((num_units, num_rounds,num_sims_apx_ate)), use_sigmoid=True)

all_0_mean = np.mean(sim_results_0["rewards"])
all_1_mean = np.mean(sim_results_1["rewards"])

print("="*20 + " True ATE (apx'ed using Monte Carlo) " + "="*20)
print(f"Mean reward under all-1 vs. all-0: {all_1_mean:.4f} vs. {all_0_mean:.4f}  ")
true_ATE = all_1_mean - all_0_mean
print(f"True ATE: {true_ATE:.4f}")


In [None]:
importlib.reload(stats_helpers)

time_cluster_matrix = stats_helpers.generate_time_blocks(T=sim_config['T'], time_block_length=sim_config['time_block_length'])
time_adj_matrix = np.tril(np.ones((sim_config['T'],sim_config['T'])), k=0) - np.tril(np.ones((sim_config['T'],sim_config['T'])), k=-(sim_config['recency'] + 1))

print(np.mean(np.sum(time_adj_matrix,axis=1)))
print(np.mean(np.sum(time_adj_matrix @ time_cluster_matrix > 0,axis=1)))

# Compute propensity score (Monte Carlo)

arms_tensor = stats_helpers.generate_cluster_treatments(cluster_matrix,time_cluster_matrix,num_W=sim_config['num_W_for_computing_prop'])
print(arms_tensor.shape)

emp_prop_score_results = stats_helpers.empirical_propensity_scores(arms_tensor,adj_matrix,time_adj_matrix,sim_config['delta'])
propensity_1_array, propensity_0_array = emp_prop_score_results['propensity_1'], emp_prop_score_results['propensity_0']

print(f"minimum of emp prop score: {np.amin(propensity_1_array)}, {np.amin(propensity_0_array)}")
print(f"total number of zeros: {np.count_nonzero(propensity_1_array == 0)}, {np.count_nonzero(propensity_0_array == 0)}")

# Display emp_prop_score_results
prop_1_mean, prop_0_mean = propensity_1_array.mean(), propensity_0_array.mean()
print(f"mean emp prop score (interior units): {prop_1_mean:.4f}, {prop_0_mean:.4f}") 
print(f"expected #(i,t) with X_it=1 is {num_units*num_rounds*prop_1_mean:.2f}, {num_units*num_rounds*prop_0_mean:.2f}")
print(f"%nz in emp prop_score_0, prop_score_1: {len(np.nonzero(propensity_0_array)[0])/(num_units*num_rounds)*100:.2f}%, {len(np.nonzero(propensity_1_array)[0])/(num_units*num_rounds)*100:.2f}%")
utils.print_time()

HT_weights_0 = np.zeros((np.shape(propensity_0_array)))
np.divide(1, propensity_0_array, out = HT_weights_0, where=propensity_0_array != 0)
print(np.mean(HT_weights_0))
print(np.std(HT_weights_0))
print(np.amax(HT_weights_0))
print(np.amin(HT_weights_0))

HT_weights_1 = np.zeros((np.shape(propensity_1_array)))
np.divide(1, propensity_1_array, out = HT_weights_1, where=propensity_1_array != 0)
print(np.mean(HT_weights_1))
print(np.std(HT_weights_1))
print(np.amax(HT_weights_1))
print(np.amin(HT_weights_1))

In [None]:
importlib.reload(stats_helpers)

num_iter_sim = 10**4

initial_state = sim_config['initial_state']

start_time = time.time()

W = stats_helpers.generate_cluster_treatments(cluster_matrix,time_cluster_matrix,num_iter_sim)
exposure_results = stats_helpers.exposure_mapping(W, adj_matrix, time_adj_matrix, sim_config['delta'])

sim_results = MC.simulate_MC(initial_state * np.ones(num_units), W, use_sigmoid=True)
rewards = sim_results["rewards"]

In [None]:
importlib.reload(stats_helpers)

ht_results = stats_helpers.horvitz_thompson(rewards,exposure_results['exposure_1'],exposure_results['exposure_0'],propensity_1_array,propensity_0_array)
ate_estimate_ht = ht_results['ate_estimate']

print("\n" + "="*20 + " Horvitz-Thompson Estimates " + "="*20)
mean_HT_est, var_HT_est = ate_estimate_ht.mean(), ate_estimate_ht.var()
print(f"mean_HT_est: {mean_HT_est:.4f}\nbias: {mean_HT_est - true_ATE:.4f}\n" + f"var_HT_est: {var_HT_est:.4f}")

In [None]:
importlib.reload(stats_helpers)

hajek_results = stats_helpers.hajek(rewards,exposure_results['exposure_1'],exposure_results['exposure_0'],propensity_1_array,propensity_0_array)
ate_estimate_hajek = hajek_results['ate_estimate']

print("\n" + "="*20 + " Hajek Estimates " + "="*20)
mean_Hajek_est, var_Hajek_est = ate_estimate_hajek.mean(), ate_estimate_hajek.var()
print(f"mean_HT_est: {mean_Hajek_est:.4f}\nbias: {mean_Hajek_est - true_ATE:.4f}\n" + f"var_Hajek_est: {var_Hajek_est:.4f}")

In [None]:
importlib.reload(stats_helpers)

burn_in = 0
DM_results = stats_helpers.diff_means(rewards,W, sim_config['time_block_length'], burn_in)
ate_estimate_DM = DM_results['ate_estimate']

print("\n" + "="*20 + " Diff-in-Means Estimates " + "="*20)
mean_DM_est, var_DM_est = ate_estimate_DM.mean(), ate_estimate_DM.var()
print(f"mean_DM_est: {mean_DM_est:.4f}\nbias: {mean_DM_est - true_ATE:.4f}\n" + f"var_DM_est: {var_DM_est:.4f}")