In [37]:
import math
import numpy as np
from scipy.stats import rv_continuous
import collections
import random

# Problem statement

With a large school of turtle visiting, an opportunity arises to acquire some top grade `Flippers`. You only have two chances to offer a good price. Each one of the turtle will accept the lowest bid that is over their reserve price. You know there’s a constant desire for flippers on the archipelago. So, at the end of the round, you’ll be able to sell them for 320 SeaShells a piece.

!-- Whilst not every turtle has the same reserve price, you know the distribution of their reserve prices. The reserve price will be no lower than 900 and no higher than 1000. The probability scales linearly from 0 at 900 to most likely at 1000. --!

You only trade with the turtle. For the second bid, they are more likely to trade with you if your second bid is higher than the average second bid

Think hard about how you want to set your two bids in order to walk away with some serious SeaShell profit.

# Solution

Let $n$ denote the number of turtle and assume each turtle has a single unit of flippers to sell.  
For each $i\in \{1,\ldots,n\}$, turtle $i$ has a reserve price $R_i$: turtle $i$ will sell its flippers iff the bid $p$ verifies $p\geq R_i$.
Note that each $R_i$ is unknown to us.

Denoting by $p_l$, $p_h$ our low and high bid,
the final profit is 
$$\sum_{i=1}^n (320-p_l)1_{R_i \leq p_l} + (320-p_h)1_{p_l < R_i \leq p_h} +  0\times 1_{R_i > p_h},$$
and our task is to choose our bids in order to maximize this sum, or equivalently, to maximize the average 
$$A \coloneqq \frac 1n \sum_{i=1}^n (320-p_l)1_{R_i \leq p_l} + (320-p_h)1_{p_l < R_i \leq p_h}.$$

Since $n$ is assumed to be large and since we know the distribution of the $R_i$, 
a heuristic suggested by the law of large numbers is to maximize the expected value $\mathbb E[A]$.  
Assuming the $R_i$ are i.i.d. with same distribution as $R$, the optimization problem is to maximize
$$(320-p_l)\mathbb P(R \leq p_l) + (320-p_h)\mathbb P(p_l < R \leq p_h)$$
under the constraints $p_l, p_h\in \mathbb N$ and $180 \leq p_l \leq p_h \leq 320$.

It was clarified by the [organizer](https://discord.com/channels/1001852729725046804/1004051976759296022/1226850690387673228) that
the distribution of reserve prices is uniform between 160-200 and 250-320, and no reserve prices fall between 200 and 250.  
The distribution of $R$ writes therefore as $f_R(r) = 0.00909$ for $160<= r <= 200$ or $250 <= r <= 320$.  

The surrogate objective function $\mathbb E[A]$ writes therefore as 
$$(320-p_l)\int_{160}^{p_l} (f_R(r)) dr + (320-p_h)\int_{p_l}^{p_h} (f_R(r)) dr
= (320-p_l)\times\begin{cases}
0 & \text{if } p_l < 160 \\
0.00909(p_l - 160) & \text{if } 160 \le p_l \le 200 \\
0.3636 & \text{if } 200 < p_l < 250 \\
0.3636 + 0.00909(p_l - 250) & \text{if } 250 \le p_l \le 320 \\
1 & \text{if } p_l > 320
\end{cases} ....
$$
$$+ (320-p_h)\times(0.00909*max(0,min(200,y) − max(160,x))+0.00909*max(0,min(320,y)−max(250,x)))
.$$




The following Python code performs optimization over the finite grid $\{(p_l, p_h)\in \mathbb N^2: 160 \leq p_l \leq p_h \leq 320\}$.

In [4]:
def objective(low, high):
    """Compute the value of the surrogate objective function.

    Parameters
    ----------
    low : int
        Value of the low bid.
    high : int
        Value of the high bid.
    
    Returns
    -------
    float
        Value of the surrogate objective function.
    """
    a = 1/110 #probability of a valid r to be the reserve price
    def lower_part_multiple(low):
        if low < 160: return 0 
        if low < 200: return (a*(low-160))
        if low < 250: return (a*40)
        if low < 320: return (a*40 + a*(low - 250))
        return 1

    def higher_part_multiple(low,high):
        return a*(max(0, min(200,high)-max(160,low))) + a*max(0, min(320,high) - max(250,low))
    
    lhs = (320-low)*lower_part_multiple(low)
    rhs = (320-high)*higher_part_multiple(low,high)
    return lhs + rhs

argmax = []
val_max = 0
for low in range(160, 321):
    for high in range(low, 321):
        comp = objective(low, high)
        if math.isclose(comp, val_max):
            argmax.append((low, high))
        elif comp > val_max:
            val_max = comp
            argmax = [(low, high)]
if len(argmax) > 1:
    print('Maximizers:', argmax)
else:
    print('Maximizer:', argmax[0])

Maximizer: (200, 285)


The maximizer is $(p_l,p_h) = (200,285)$.

# Results

There were actually $n=5000$ fish.
While $(952,978)$ is the solution that maximizes the surrogate objective function, 
it was suboptimal for the online judge.

<p float="center">
  <img src="https://i.imgur.com/EfE7rbg.png" width="1200" />
</p>

Below, we investigate whether the suboptimality of $(952,978)$ was a one-off. We repeat the experiment many times.  
Reserve prices are sampled using inverse transform sampling, since the data is univariate and the CDF can be explicitly inversed.

In [None]:
def objective2(low, high, reserve_prices, avg_high_bid):
    """Compute the value of the objective function.

    Parameters
    ----------
    low : int
        Value of the low bid.
    high : int
        Value of the high bid.
    reserve_prices : ndarray
        Reserve price for each fish
        
    Returns
    -------
    float
        Value of the objective function.
    """
    p = ((320 - avg_high_bid) / (320 - high)) ** 3 if high < 320 else 0
    arr = (320-low) * (reserve_prices <= low) + (320-high) * ((low < reserve_prices) & (reserve_prices <= high))*p
    return np.sum(arr)

def maximize(reserve_prices):
    """Compute maximizers of the objective function.

    Parameters
    ----------
    reserve_prices : ndarray
        Reserve price for each fish
        
    Returns
    -------
    argmax : list of tuple
        Maximizers.
    """
    argmax = []
    val_max = 0
    high = 319
    for low in range(160, 321):
        for ahb in range(250,320):
            comp = objective2(low, high, reserve_prices, ahb)
            if math.isclose(comp, val_max):
                argmax.append((low, high))
            elif comp > val_max:
                val_max = comp
                argmax = [(low, high)]
    return argmax

class reserve_price_gen(rv_continuous):
    """Piecewise uniform distribution on [160, 200] ∪ [250, 320]"""

    def _pdf(self, r):
        # PDF is 0.00909 on both intervals
        return np.where(
            ((160 <= r) & (r <= 200)) | ((250 <= r) & (r <= 320)),
            0.00909,
            0.0
        )

    def _cdf(self, r):
        # Manually compute cumulative distribution function
        r = np.asarray(r)
        cdf = np.zeros_like(r, dtype=float)

        # Region 1: 160 ≤ r ≤ 200
        mask1 = (160 <= r) & (r <= 200)
        cdf[mask1] = 0.00909 * (r[mask1] - 160)

        # Region 2: 200 < r < 250 (zero increase here)
        mask2 = (200 < r) & (r < 250)
        cdf[mask2] = 0.00909 * 40  # full mass of first interval

        # Region 3: 250 ≤ r ≤ 320
        mask3 = (250 <= r) & (r <= 320)
        cdf[mask3] = 0.00909 * 40 + 0.00909 * (r[mask3] - 250)

        # Region 4: r > 320
        mask4 = r > 320
        cdf[mask4] = 1.0

        return cdf

    def _ppf(self, u):
        # Invert CDF to generate samples from uniform intervals
        u = np.asarray(u)
        x = np.zeros_like(u, dtype=float)

        # First interval
        mask1 = u <= 0.3636
        x[mask1] = 160 + u[mask1] / 0.00909

        # Second interval
        mask2 = u > 0.3636
        x[mask2] = 250 + (u[mask2] - 0.3636) / 0.00909

        return x

def repeat(m):
    """Simulate the experiment several times.

    Parameters
    ----------
    m : int 
        Number of repetitions
        
    Returns
    -------
    res : list of list
        Maximizers for each experiment.
    """
    res = []
    reserve_price_dist = reserve_price_gen(a=160, b=320)
    for _ in range(m):
        reserve_prices = reserve_price_dist.rvs(size=5000)
        res.append(maximize(reserve_prices))
    return res

The next cell repeats the experiment 1000 times. The output indicates that $(200,285)$ is optimal in 2.7% of the experiments. 

In [53]:
res = repeat(100)
collections.Counter([el[0] for el in res])

KeyboardInterrupt: 

Now including the average for the second bids

In [31]:
def objective3(low, high, reserve_prices, avg_bid):
    """Compute the value of the objective function with trade probability modifier.

    Parameters
    ----------
    low : int
        Value of the low bid.
    high : int
        Value of the high bid.
    reserve_prices : ndarray
        Reserve price for each fish.
    avg_bid : float
        Average bid in the round (used to compute trade probability).
        
    Returns
    -------
    float
        Value of the objective function.
    """
    p = ((320 - avg_bid) / (320 - high)) ** 3 if high < 320 else 0  # prevent div by 0
    # First group: fully accept low bid
    trade1 = (320 - low) * (reserve_prices <= low)
    # Second group: adjust with probability factor p
    trade2 = (320 - high) * ((low < reserve_prices) & (reserve_prices <= high)) * p
    # print(np.sum(trade1 + trade2))
    return np.sum(trade1 + trade2)

def maximize(reserve_prices, avg_bid):
    """Compute maximizers of the objective function using global average high bid.

    Parameters
    ----------
    reserve_prices : ndarray
        Reserve price for each fish.
    avg_bid : float
        The average high bid across all players in this round.
        
    Returns
    -------
    argmax : list of tuple
        Maximizers.
    """
    argmax = []
    val_max = 0
    for low in range(160, 321):
        for high in range(low, 321):
            comp = objective3(low, high, reserve_prices, avg_bid)
            if math.isclose(comp, val_max):
                argmax.append((low, high))
            elif comp > val_max:
                val_max = comp
                argmax = [(low, high)]
    return argmax


def repeat(m, n_players=10):
    """Simulate the experiment several times.

    Parameters
    ----------
    m : int 
        Number of repetitions
    n_players : int
        Number of players per experiment
        
    Returns
    -------
    res : list of list
        Maximizers for each experiment.
    """
    res = []
    reserve_price_dist = reserve_price_gen(a=160, b=320)

    for _ in range(m):
        # Sample reserve prices once per round
        reserve_prices = reserve_price_dist.rvs(size=5000)

        # Simulate each player picking random (low, high) bids
        high_bids = [np.random.randint(160, 321) for _ in range(n_players)]
        avg_high_bid = sum(high_bids) / len(high_bids)
        # print(avg_high_bid)
        # Use the avg high bid to guide your bidding strategy
        res.append(maximize(reserve_prices, avg_high_bid))

    return res

In [32]:
res = repeat(100, 1000)
collections.Counter([el[0] for el in res])

Counter({(160, 319): 100})