In [None]:
import numpy as np
def pairwise_fw(
    objective_fun, 
    gradient_fun, 
    LMO, 
    x0, 
    hyperparams = {"max_iterations": 20, "tolerance": 1e-6}
):
    """
    Inputs:
        - objective_fun (callable): Function f(x) to minimize (or maximize if sign=-1).
        - gradient_fun (callable): Gradient of f(x).
        - projection_operator (callable): Projection onto the feasible set (e.g., Lâˆž-ball).
        - x0 (np.ndarray): Initial feasible point.
        - hyperparams (dict): Dictionary with hyperparameters:
            - "max_iterations" (int): Maximum number of iterations.
            - "tolerance" (float): Tolerance on gradient norm.
    Outputs:
        - x_t (np.ndarray): Final solution.
        - t (int): Number of iterations performed.
        - history (dict): Contains 'objective' and 'gradient_norm'.
    """
    # Defining the parameters
    x_t = x0.copy().astype(np.float64)
    max_iterations = hyperparams["max_iterations"]
    tolerance = hyperparams["tolerance"]

    # Defining the active set S and weights alpha_t
    active_set = [x0.copy()]
    weights = np.array([1.0])

    # History trackers
    history = {'objective': [], 'gap': [], 'gradient': []}

    # Starting the Pairwise Frank-Wolfe iterations
    for t in range(1,max_iterations+1):
        
        # Compute the loss and gradient at the current point
        objective_t = objective_fun(x_t)
        grad_t  = gradient_fun(x_t)

        # Compute the FW direction 
        s_t = LMO(grad_t)
        d_t_FW = s_t - x_t
        
        # Compute the away vertex
        v_scores = np.array([grad_t @ v for v in active_set])
        v_t_idx = np.argmax(v_scores)
        v_t = active_set[v_t_idx]

        # Compute the duality gap
        gap_t_FW = -grad_t @ d_t_FW

        # Store history
        history['gradient'].append(grad_t)
        history['objective'].append(objective_t)
        history['gap'].append(gap_t_FW)

        # Check for convergence
        if gap_t_FW < tolerance:
            print(f"Duality gap below tolerance at iteration {t}: {gap_t_FW}")
            break

        # Compute the pairwise step direction
        d_t_PW = s_t - v_t

        # Extract the step size
        gamma_t = min(weights[v_t_idx], 2.0 / (t + 2.0))

        # Compute the next step
        x_t += gamma_t * d_t_PW

        # Update the weight of the away vertex v_t
        weights[v_t_idx] -= gamma_t

        # Check is s_t is in the active set
        s_t_in =  False
        for i,v in enumerate(active_set):
            if np.allclose(v, s_t):
                weights[i] += gamma_t
                s_t_in = True
                break
            
        # If s_t is not in the active set, add it
        if not s_t_in:
            active_set.append(s_t)
            weights = np.append(weights, gamma_t)

        # Remove the away vertex from the active set if its weight is zero
        check_weights = np.where(weights > tolerance)[0]
        if len(check_weights) < len(weights):
            active_set = [active_set[i] for i in check_weights]
            weights = weights[check_weights]

    return x_t, t, history