In [38]:
import numpy as np
from typing import Tuple, Optional
from scipy.optimize import minimize

In [39]:
def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
    """Calculate cosine similarity between two vectors."""
    dot_product = np.dot(a, b)
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    
    if norm_a == 0 or norm_b == 0:
        return 0
    
    return dot_product / (norm_a * norm_b)

In [40]:
def round_to_integers_maintaining_sum(
    vector: np.ndarray,
    target_sum: float
) -> np.ndarray:
    """Round vector to integers while maintaining the target sum."""
    # Round down initially
    rounded = np.floor(vector).astype(int)
    remainder = int(target_sum - np.sum(rounded))
    
    if remainder > 0:
        # Add remainder to items with largest fractional parts
        fractional_parts = vector - rounded
        indices = np.argsort(fractional_parts)[-remainder:]
        rounded[indices] += 1
    
    return rounded.astype(float)

In [41]:
def generate_random_vector(
    n_items: int, 
    max_utility: float,
    integer_values: bool
) -> np.ndarray:
    """Generate a random vector that sums to max_utility."""
    # Generate random proportions
    proportions = np.random.randn.dirichlet(np.ones(n_items))
    vector = proportions * max_utility
    
    if integer_values:
        # Round to integers while maintaining sum
        vector = round_to_integers_maintaining_sum(vector, max_utility)
    
    return vector


In [42]:
def simple_method(
    target_cosine: float,
    n_items: int,
    max_utility: float,
    integer_values: bool
) -> Tuple[np.ndarray, np.ndarray]:
    """Simple geometric method to generate vectors with target similarity."""
    # Generate first vector randomly
    v1 = generate_random_vector(n_items, max_utility)
    
    # Normalize v1
    v1_norm = v1 / np.linalg.norm(v1)
    
    # Generate a random orthogonal vector
    random_vec = np.random.randn(n_items)
    # Make it orthogonal to v1 using Gram-Schmidt
    v_orthogonal = random_vec - np.dot(random_vec, v1_norm) * v1_norm
    v_orthogonal = v_orthogonal / np.linalg.norm(v_orthogonal)
    
    # Calculate angle from desired cosine similarity
    angle = np.arccos(np.clip(target_cosine, -1, 1))
    
    # Construct v2 using the angle
    v2_direction = np.cos(angle) * v1_norm + np.sin(angle) * v_orthogonal
    
    # Scale to have same magnitude as v1
    v2 = v2_direction * np.linalg.norm(v1)
    
    # Ensure non-negative and normalize sum
    v2 = np.abs(v2)
    v2 = v2 * (max_utility / np.sum(v2))
    
    if integer_values:
        v2 = round_to_integers_maintaining_sum(v2, max_utility)
    
    return v1, v2


In [43]:
def optimize_vectors(
    target_cosine: float,
    n_items: int,
    max_utility: float,
    integer_values: bool,
    max_attempts: int = 10
) -> Tuple[np.ndarray, np.ndarray]:
    """Use optimization to find vectors with target cosine similarity."""
    
    best_result = None
    best_error = float('inf')
    
    for attempt in range(max_attempts):
        # Random starting point
        x0 = np.random.uniform(0, max_utility/n_items, 2 * n_items)
        
        def objective(x):
            v1 = x[:n_items]
            v2 = x[n_items:]
            
            # Ensure non-negative values
            if np.any(v1 < 0) or np.any(v2 < 0):
                return 1e10
            
            # Calculate cosine similarity
            cos_sim = cosine_similarity(v1, v2)
            
            # Primary objective: match target cosine similarity
            cos_penalty = 1000 * (cos_sim - target_cosine) ** 2
            
            # Ensure vectors sum to max_utility
            sum_penalty = 100 * ((np.sum(v1) - max_utility) ** 2 + 
                                    (np.sum(v2) - max_utility) ** 2)
            
            return cos_penalty + sum_penalty
        
        # Constraints: all values >= 0, sums = max_utility
        constraints = [
            {'type': 'eq', 'fun': lambda x: np.sum(x[:n_items]) - max_utility},
            {'type': 'eq', 'fun': lambda x: np.sum(x[n_items:]) - max_utility}
        ]
        
        bounds = [(0, max_utility) for _ in range(2 * n_items)]
        
        result = minimize(objective, x0, method='SLSQP', 
                        bounds=bounds, constraints=constraints,
                        options={'maxiter': 1000, 'ftol': 1e-9})
        
        if result.success:
            error = objective(result.x)
            if error < best_error:
                best_error = error
                best_result = result
    
    if best_result is None:
        # Fallback to simple method
        return simple_method(target_cosine, n_items, max_utility)
    
    v1 = best_result.x[:n_items]
    v2 = best_result.x[n_items:]
    
    if integer_values:
        v1 = round_to_integers_maintaining_sum(v1, max_utility)
        v2 = round_to_integers_maintaining_sum(v2, max_utility)
    
    return v1, v2


In [44]:
# Define parameters for testing
n_items = 5
max_utility = 100.0
integer_values = True
test_cosines = np.round(np.arange(0.0, 1.01, 0.1), 2)

print("Target Cosine | Actual Cosine | v1[:3]... | v2[:3]...")

for target_cosine in test_cosines:
    v1, v2 = optimize_vectors(
        target_cosine=target_cosine,
        n_items=n_items,
        max_utility=max_utility,
        integer_values=integer_values
    )
    # Calculate actual cosine similarity
    cos_sim = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
    # Ensure v1 and v2 are arrays (not scalars) before indexing
    v1_display = v1 if isinstance(v1, (np.ndarray, list)) else np.array([v1])
    v2_display = v2 if isinstance(v2, (np.ndarray, list)) else np.array([v2])
    print(f"{target_cosine:12.2f} | {cos_sim:13.4f} | {v1_display[:3]}... | {v2_display[:3]}...")

Target Cosine | Actual Cosine | v1[:3]... | v2[:3]...
        0.00 |        0.0000 | [ 0. 48.  0.]... | [12.  0. 54.]...
        0.10 |        0.0979 | [ 0. 25. 38.]... | [58.  8.  0.]...
        0.20 |        0.1910 | [27.  0. 40.]... | [16. 48.  0.]...
        0.30 |        0.2989 | [34.  0. 36.]... | [14. 40.  7.]...
        0.40 |        0.4004 | [40. 11. 25.]... | [ 2. 22. 15.]...
        0.50 |        0.5081 | [26. 22.  2.]... | [ 9. 23. 40.]...
        0.60 |        0.5997 | [12. 12. 31.]... | [29. 36.  7.]...
        0.70 |        0.6977 | [23. 31. 20.]... | [14.  7. 14.]...
        0.80 |        0.8060 | [16. 27. 20.]... | [30. 13. 16.]...
        0.90 |        0.9002 | [22. 18. 14.]... | [13.  9. 31.]...
        1.00 |        1.0000 | [23. 20. 14.]... | [23. 20. 14.]...
