In [3]:
from bisect import bisect_left
import random
import pickle
import numpy as np


def binary_search_with_uniform_choice(cdf, random_value):
    # Find the insertion point for the random_value in the CDF list
    index = bisect_left(cdf, (random_value,))
    
    # If the random value is greater than the last CDF value, uniformly choose among all entries with the same CDF value
    if index == len(cdf):
        max_cdf_value = cdf[-1][0]
        candidates = [item for item in cdf if item[0] == max_cdf_value]
        _, item_id = random.choice(candidates)
        return item_id
    
    # If we found an exact match, just return the item
    if cdf[index][0] == random_value:
        return cdf[index][1]
    
    # If the random value is less than the CDF value at the found index, check for duplicate CDF values
    if index > 0 and cdf[index - 1][0] == cdf[index][0]:
        # Collect all items with the same CDF value
        same_cdf_value = cdf[index][0]
        candidates = [item for item in cdf if item[0] == same_cdf_value]
        _, item_id = random.choice(candidates)
        return item_id
    
    # Otherwise, return the item at the found index
    return cdf[index][1]

In [8]:
# Load the CDF model
with open('lora_dist_model.pkl', 'rb') as f:
    cdf_model = pickle.load(f)

# Generate a random item
rand = np.random.rand()
random_item = binary_search_with_uniform_choice(cdf_model, rand)

# Output the result
print(f"The randomly selected item is: {random_item}")

The randomly selected item is: 8379
