In [9]:
from bisect import bisect_left
import random
import pickle
import numpy as np
import time

def binary_search_with_uniform_choice(cdf, random_value):
    # Find the insertion point for the random_value in the CDF list
    index = bisect_left(cdf, (random_value,))
    
    # If the random value is greater than the last CDF value, uniformly choose among all entries with the same CDF value
    if index == len(cdf):
        max_cdf_value = cdf[-1][0]
        candidates = [item for item in cdf if item[0] == max_cdf_value]
        _, item_id = random.choice(candidates)
        return item_id
    
    # If we found an exact match, just return the item
    if cdf[index][0] == random_value:
        return cdf[index][1]
    
    # If the random value is less than the CDF value at the found index, check for duplicate CDF values
    if index > 0 and cdf[index - 1][0] == cdf[index][0]:
        # Collect all items with the same CDF value
        same_cdf_value = cdf[index][0]
        candidates = [item for item in cdf if item[0] == same_cdf_value]
        _, item_id = random.choice(candidates)
        return item_id
    
    # Otherwise, return the item at the found index
    return cdf[index][1]

In [12]:
# Load the CDF model
with open('lora_dist_model.pkl', 'rb') as f:
    cdf_model = pickle.load(f)

# Generate a random item and time it
start = time.time()
rand = np.random.rand()
random_item = binary_search_with_uniform_choice(cdf_model, rand)
end = time.time()


# Output the result
print(f"Time taken: {end - start}")
print(f"The randomly selected item is: {random_item}")

Time taken: 5.412101745605469e-05
The randomly selected item is: 44353


In [20]:
import json
from datetime import datetime
import pickle

# Load civit_catalog.json
with open('cache/civit_catalog.json') as f:
    civit_catalog = json.load(f)

def parse_date(date_str):
    """
    Attempts to parse a datetime string with fractional seconds.
    Falls back to parsing without fractional seconds if that fails.
    """
    try:
        return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%f")
    except ValueError:
        return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S")

dates = []
for item in civit_catalog:
    if item['type'] == 'LORA':
        if len(item['modelVersions']) == 0:
            continue
        oldest_version = item['modelVersions'][-1]
        published_date_str = oldest_version['publishedAt']
        if published_date_str is None:
            continue
        # Use the adjusted parse_date function
        published_date = parse_date(published_date_str)
        dates.append(published_date)

# Proceed only if there are dates to sort
if dates:
    # Find the newest date
    dates.sort()
    newest_date = dates[-1]
    print(f"The newest date is: {newest_date}")
else:
    print("No dates found.")


The newest date is: 2024-03-22 05:23:22.600000
