# Requirement 5

In [22]:
import numpy as np

np.set_printoptions(precision=3)

class PricingEnvConversion:
    def __init__(self, mu, B, T, conversion_fn, n_customers=100):
        """
        mu: N x P matrix of per-unit profit (price - cost)
        B: total production capacity (shared among products, not used right now)
        T: total number of rounds
        conversion_fn: function which for a given price index gives us buying probability
        n_customers: number of potential buyers per round
        """
        self.mu = np.array(mu)
        self.B = B
        self.T = T
        self.conversion_fn = conversion_fn
        self.n_customers = n_customers
        self.t = 0
        self.N, self.P = self.mu.shape

    def round(self, price_vector):
        """
        price_vector: length N, each entry is an integer in [0, P-1] 
                      representing chosen price index for each product.
        """
        if self.t >= self.T:
            raise Exception("Environment finished all rounds.")

        rewards_per_unit = self.mu[np.arange(self.N), price_vector]

        probabilities = np.array([self.conversion_fn(price_vector[i]) for i in range(self.N)])
        demand = np.random.binomial(self.n_customers, probabilities)

        # Disabled inventory constraint for now
        """# Apply shared capacity constraint
        total_demand = np.sum(demand)
        if total_demand > self.B:
            # Randomly allocate units to products proportionally to demand
            allocation = np.zeros_like(demand)
            for _ in range(int(self.B)):
                i = np.random.choice(np.arange(self.N), p=demand/total_demand)
                allocation[i] += 1
            demand = allocation"""

        profit = rewards_per_unit * demand

        self.t += 1
        return profit


In [23]:
class CombinatorialUCB:
    def __init__(self, N, P, B):
        """
        N: number of products
        P: number of prices
        B: shared capacity (not used right now)
        """
        self.N = N
        self.P = P
        self.B = B
        self.counts = np.zeros((N, P))  # times each arm is chosen
        self.means = np.zeros((N, P))   # estimated mean rewards
        self.t = 0

    def pull_arm(self):
        # Compute UCB for all product-price pairs
        ucb = self.means + np.sqrt(np.log(max(1, self.t + 1)) / (self.counts + 1e-6))
        
        # pick best price per product by UCB
        prices = np.argmax(ucb, axis=1)
        
        return prices

    def update(self, prices, rewards):
        """
        prices: chosen price index per product
        rewards: observed per-product reward
        """
        self.t += 1
        for i in range(self.N):
            p = prices[i]
            self.counts[i, p] += 1
            n = self.counts[i, p]
            self.means[i, p] += (rewards[i] - self.means[i, p]) / n


In [24]:
import numpy as np

conversion_fn = lambda p: 1 - p/20

# Parameters
N = 3        # number of products
P = 5        # number of price options per product
B = 2        # shared capacity
T = 500_000   # number of rounds

# Per-unit profits matrix (N x P)
mu = np.array([
    [2, 4, 6, 8, 10],
    [1, 3, 5, 7, 9],
    [0.5, 2, 4, 6, 8]
])

# Compute clairvoyant (best fixed price per product)
expected_profits = np.zeros_like(mu, dtype=float)
n_customers = 100 
for i in range(N):
    for p in range(P):
        prob = conversion_fn(p)
        demand = prob * n_customers  
        expected_profits[i, p] = mu[i, p] * demand

best_price_indices = np.argmax(expected_profits, axis=1)
clairvoyant_total = expected_profits[np.arange(N), best_price_indices].sum()
print("Clairvoyant prices per product:", best_price_indices)
print("Clairvoyant total profit per round:", clairvoyant_total)

for trial in range(4):

    print("----------------------------------------------")

    env = PricingEnvConversion(mu=mu, B=B, T=T, conversion_fn=conversion_fn)
    agent = CombinatorialUCB(N=N, P=P, B=B)

    all_rewards = []
    cumulative_regret = []

    for t in range(T):
        prices = agent.pull_arm()

        rewards = env.round(prices)

        agent.update(prices, rewards)

        all_rewards.append(rewards)
        
        cumulative_regret.append(clairvoyant_total - rewards.sum())

    all_rewards = np.array(all_rewards)
    cumulative_regret = np.cumsum(cumulative_regret)

    ucb = agent.means + np.sqrt(np.log(max(1, agent.t + 1)) / (agent.counts + 1e-6))
    predicted_best = np.argmax(ucb, axis=1)
    print(f"Agent's predicted best prices at end of trial {trial+1}: {predicted_best}")

    print("Cumulative profit per product:", all_rewards.sum(axis=0))
    print("Total cumulative profit:", all_rewards.sum())
    print("Final estimated mean rewards (agent):")
    print(agent.means)
    print("Number of times each price was pulled:")
    print(agent.counts)

    # Check if regret is sublinear
    avg_regret = cumulative_regret[-1] / T
    print(f"Average regret per round: {avg_regret:.4f}")
    if avg_regret < 1:
        print("Regret appears sublinear")
    else:
        print("Regret may not be sublinear")


Clairvoyant prices per product: [4 4 4]
Clairvoyant total profit per round: 2160.0
----------------------------------------------
Agent's predicted best prices at end of trial 1: [4 4 4]
Cumulative profit per product: [4.0e+08 3.6e+08 3.2e+08]
Total cumulative profit: 1080034982.0
Final estimated mean rewards (agent):
[[200.    380.    582.    560.    799.998]
 [100.    267.    450.    581.    720.025]
 [ 50.    194.    352.    528.    640.056]]
Number of times each price was pulled:
[[1.e+00 1.e+00 1.e+00 1.e+00 5.e+05]
 [1.e+00 1.e+00 1.e+00 1.e+00 5.e+05]
 [1.e+00 1.e+00 1.e+00 1.e+00 5.e+05]]
Average regret per round: -0.0700
Regret appears sublinear
----------------------------------------------
Agent's predicted best prices at end of trial 2: [4 4 4]
Cumulative profit per product: [4.0e+08 3.6e+08 3.2e+08]
Total cumulative profit: 1079952402.0
Final estimated mean rewards (agent):
[[200.    376.    558.    632.    799.978]
 [100.    279.    420.    588.    719.947]
 [ 50.    186.