In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from botorch.fit import fit_gpytorch_mll
from botorch.models.pairwise_gp import PairwiseGP, PairwiseLaplaceMarginalLogLikelihood
from botorch.models.transforms.input import Normalize
from botorch.acquisition.preference import AnalyticExpectedUtilityOfBestOption
from botorch.optim import optimize_acqf
from scipy.stats import kendalltau

teams = [
    "Bangladesh", "India", "Pakistan", "Nepal", "Afghanistan",
    "Sri Lanka", "USA", "South Africa", "England", "West Indies",
    "Zimbabwe", "Ireland", "Netherlands", "Canada", "Australia", "Kenya", "Argentina"
]

n_teams = len(teams)

comp_matrix = np.full((n_teams, n_teams), np.nan)

print("Teams:")
for idx, team in enumerate(teams):
    print(f"{idx+1}: {team}")

def query_user_for_comparison(i, j):
    """Query the user for comparison between two teams"""
    user_input = input(f"Which team is preferred between {teams[i]} vs {teams[j]}? (1 for {teams[i]}, 0 for {teams[j]}): ")
    return (i, j, 1 if user_input == '1' else 0)

def update_comparison_matrix(comp_matrix, i, j, result):
    """Update the comparison matrix based on user input"""
    if result == 1:
        comp_matrix[i, j] = 1
        comp_matrix[j, i] = 0
    else:
        comp_matrix[i, j] = 0
        comp_matrix[j, i] = 1
    return comp_matrix

def generate_data(n, dim=2):
    X = torch.rand(n, dim, dtype=torch.float64)
    return X

def init_and_fit_model(X, comp):
    model = PairwiseGP(
        datapoints=X,
        comparisons=comp,
        input_transform=Normalize(d=X.shape[-1]),
    )
    mll = PairwiseLaplaceMarginalLogLikelihood(model.likelihood, model)
    fit_gpytorch_mll(mll)
    return mll, model

def get_best_next_pair(comp_matrix, model, valid_pairs):
    """Find the pair (i, j) that maximizes the acquisition function"""
    best_value = -float('inf')
    best_pair = None
    acq_func = AnalyticExpectedUtilityOfBestOption(pref_model=model, previous_winner=None)
    for i, j in valid_pairs:
        with torch.no_grad():
            x = torch.tensor([[i], [j]], dtype=torch.float64).reshape(2, -1)
            value = acq_func(x.unsqueeze(0)).item()
            if value > best_value:
                best_value = value
                best_pair = (i, j)
    return best_pair

def filter_valid_pairs(comp_matrix):
    """Filter out pairs where at least one team has already lost"""
    valid_pairs = []
    for i in range(n_teams):
        if np.any(comp_matrix[i, :] == 0):
            continue
        for j in range(i + 1, n_teams):
            if np.isnan(comp_matrix[i, j]) and not np.any(comp_matrix[j, :] == 0):
                valid_pairs.append((i, j))
    return valid_pairs

# Ask the user for initial comparisons
initial_comparisons = []
initial_X_indices = [(0, 1), (2, 4), (3, 6)]
print("Please provide initial comparisons for the following teams:")
for i, j in initial_X_indices:
    team1, team2 = teams[i], teams[j]
    _, _, result = query_user_for_comparison(i, j)
    initial_comparisons.append((i, j, result))
    comp_matrix = update_comparison_matrix(comp_matrix, i, j, result)

comparisons = []
for comp in initial_comparisons:
    comparisons.append((comp[0], comp[1]) if comp[2] == 1 else (comp[1], comp[0]))
comparisons = torch.tensor(comparisons)

# Initialize the model with user-defined data
dim = 4
init_X = generate_data(n_teams, dim=dim)  # Generate enough data points for all teams
mll, model = init_and_fit_model(init_X, comparisons)

# Sequential Bayesian optimization loop
NUM_ITERATIONS = 10
best_observed_values = []

# Track the best observed value
current_best = float('-inf')

for iteration in range(NUM_ITERATIONS):
    # Find valid pairs to compare
    valid_pairs = filter_valid_pairs(comp_matrix)
    if not valid_pairs:
        print("No more valid pairs to query.")
        break

    # Get the best next pair (i, j)
    best_pair = get_best_next_pair(comp_matrix, model, valid_pairs)
    if best_pair is None:
        print("Failed to find a valid pair to query.")
        break

    i, j = best_pair
    _, _, result = query_user_for_comparison(i, j)
    comp_matrix = update_comparison_matrix(comp_matrix, i, j, result)
    comparisons = torch.cat([comparisons, torch.tensor([(i, j)] if result == 1 else [(j, i)])])
    
    # Update and refit the model
    mll, model = init_and_fit_model(init_X, comparisons)
    
    # Track the best observed value
    current_best = max(current_best, torch.max(model.posterior(init_X).mean).item())
    best_observed_values.append(current_best)

# Final model evaluation
# def eval_kt_cor(model, test_X):
#     pred_y = model.posterior(test_X).mean.squeeze().detach().numpy()
#     test_y = np.sum(test_X.numpy(), axis=1)
#     return kendalltau(pred_y, test_y).correlation

test_X = generate_data(1000, dim=dim)
# kt_correlation = eval_kt_cor(model, test_X)

# Plot the results
iters = list(range(1, NUM_ITERATIONS + 1))

plt.figure(figsize=(8, 6))
plt.plot(iters, best_observed_values, label="EUBO", linewidth=1.5, marker='o')
plt.xlabel("Number of queries")
plt.ylabel("Best observed value")
plt.title("Bayesian Optimization for Team Selection for a knockout tournament")
plt.legend()
plt.grid(True)
plt.show()


ModuleNotFoundError: No module named 'botorch'