# Timing Tests for Basic Functions
by: Octopus

Experiments to see if alternate version of specific functions offer any performance enhancement. 

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import random
import math
import time

## Function to Generate Random Examples of Logits and Samples

In [2]:
def generate_samples(num_logits, num_samples):
    """
    Function to generate random samples of logits and samples. 
    """

    # Generate random log contributions for items
    logits = [random.uniform(-3, 3) for _ in range(num_logits)]  # Random values between 0 and 3

    # Generate samples (a, b, c) triples
    samples = []
    for _ in range(num_samples):
        a, b = random.sample(range(num_logits), 2)  # Pick two different indices
        c = random.uniform(-3,3)  # Give reviewers opinions
        samples.append((a, b, c))

    return logits, samples

## Different Versions of the Cost Functions

We compare two different versions of the cost function. The second one unpacks the tuples into numpy arrays. 

In [3]:
def original_cost_function(logits, samples):
    # The original version, using one comprehension with appeal to python math.
    return sum((logits[b] - logits[a] - c) ** 2 for a, b, c in samples)

   
def alternate_cost_function(logits,samples):
    # A potential modification, which pays the cost of three comprehensions
    # in exchange for being able to use numpy operations 
    logits_a = np.array([logits[a] for a, _, _ in samples])
    logits_b = np.array([logits[b] for _,b,_ in samples])
    c_elements = np.array([c for _,_,c in samples])
    modified_cost = np.square(np.sum(logits_b - logits_a - c_elements))
    return modified_cost

## Comparing the two versions of the cost function

Below are helper functions to compare the performance (time) and accuracy of the two versions of the function. 

In [4]:
# Performance profiler
def performance_profiler(num_items, num_samples):
    # Generate sample logits and samples
    logits, samples = generate_samples(num_items, num_samples)

    # Profile original cost function
    start_time = time.time()
    original_cost = original_cost_function(logits, samples)
    original_duration = time.time() - start_time

    # Profile modified cost function
    start_time = time.time()
    modified_cost = modified_cost_function(logits, samples)
    modified_duration = time.time() - start_time

    # Print results
    print(f"Original Cost: {original_cost}, Time: {original_duration:.6f} seconds")
    print(f"Modified Cost: {modified_cost}, Time: {modified_duration:.6f} seconds")


In [5]:
# Accuracy test function
def accuracy_test(num_items, num_samples):
    # Generate sample logits and samples
    logits, samples = generate_samples(num_items, num_samples)

    # Calculate costs using both functions
    original_cost = original_cost_function(logits, samples)
    modified_cost = modified_cost_function(logits, samples)

    # Check if the results are the same
    if original_cost == modified_cost:
        print("Accuracy Test Passed: Both functions give the same result.")
    else:
        print("Accuracy Test Failed: Results differ.")
        print(f"Original Cost: {original_cost}, Modified Cost: {modified_cost}")


In [6]:

# Performance profiler for multiple input sizes
def performance_profiler_multiple_sizes(sizes):
    num_sizes = len(sizes)
    original_times = [0] * num_sizes  # Preallocate list for original times
    modified_times = [0] * num_sizes  # Preallocate list for modified times

    for index, (num_items, num_samples) in enumerate(sizes):
        # Generate sample logits and samples
        logits, samples = generate_samples(num_items, num_samples)

        # Profile original cost function
        start_time = time.time()
        original_cost_function(logits, samples)
        original_duration = time.time() - start_time
        original_times[index] = original_duration  # Store time in preallocated list

        # Profile modified cost function
        start_time = time.time()
        modified_cost_function(logits, samples)
        modified_duration = time.time() - start_time
        modified_times[index] = modified_duration  # Store time in preallocated list

    # Plotting the results
    plt.figure(figsize=(10, 6))
    plt.plot([size[0] for size in sizes], original_times, label='Original Cost Function', marker='o')
    plt.plot([size[0] for size in sizes], modified_times, label='Modified Cost Function', marker='x')
    plt.title('Performance Comparison of Cost Functions')
    plt.xlabel('Number of Items')
    plt.ylabel('Time (seconds)')
    plt.legend()
    plt.grid()
    plt.show()

# Example usage
if __name__ == "__main__":
    # Define a list of (num_items, num_samples) tuples for different input sizes
    input_sizes = [(1000, 10), (5000, 50), (10_000, 1000), (20_000, 2000)]
    performance_profiler_multiple_sizes(input_sizes)

KeyboardInterrupt: 