In [1]:
import numpy as np
import numpy.linalg as npl
import matplotlib.pyplot as plt
from matplotlib.patches import FancyArrowPatch
from matplotlib.path import Path
from matplotlib import rc
from scipy.optimize import minimize_scalar, minimize
import datetime
from datetime import timedelta
import time
import os
import networkx as nx
import pandas as pd
import itertools 
from gurobipy import*
from time import process_time
from scipy.optimize import minimize

In [2]:
from typing import List, Dict, Set
import numpy as np
import math
import logging
from utils import *
from constants import *

logging.basicConfig(level=logging.WARNING)


def sort(x: List[float], reverse=False):
    """
    :param x: List of numbers
    :param reverse: Sorts in decreasing order if set to True
    :return: Sorted list and the corresponding mapping (permutation)
    """
    enum = sorted(enumerate(x), key=lambda z: z[1], reverse=reverse)
    y = [enum[j][1] for j in range(len(enum))]
    mapping = {enum[j][0]: j for j in range(len(enum))}

    return y, mapping


def invert(mapping: Dict[int, int]):
    """
    Invert a (bijective) mapping {0, ..., n - 1} -> {0, ..., n - 1}
    :param mapping: Original mapping
    :return: Inverse of the original mapping
    """
    return {mapping[i]: i for i in range(len(mapping))}


def map_set(S: Set[int], mapping: Dict[int, int]):
    """
    Determines the range of S under mapping
    :param S: set of integers
    :param mapping: mapping
    :return: range of S under mapping as a set
    """
    return set({mapping[i] for i in S})


def permute(x: List[float], mapping: Dict[int, int]):
    """
    Permutes x according to mapping
    :param x:
    :param mapping:
    :return:
    """
    y = [0.0] * len(x)
    for i in range(len(x)):
        y[mapping[i]] = x[i]

    return y


class SubmodularFunction:
    def __init__(self, n: int = 0):
        """
        :param S: ground set; if you want to make the ground set [1, ..., n] set S = {} and see n
        :param n: if you want the ground set to be the range [1, ..., n]
        """
        self.n = n

    def __len__(self):
        return self.n

    def function_value(self, T: set):
        if not T.issubset(range(self.n)):
            raise ValueError("The provided set is not a subset of the ground set.")
        return 0.0


class CardinalityDifferenceSubmodularFunction(SubmodularFunction):
    def __init__(self, g: List[float], r: List[float], n: int = 0):
        super().__init__(n)
        # We assume that the list g is the tuple (g(1), ..., g(n)). We append g(0) = 0 to this list
        # Check if g induces a submodular function f
        if not self.is_cardianality_submodular([0.0] + g):
            raise TypeError('The tuple g does not induce a cardinality based polytope.')

        if len(g) != len(r) or len(g) != n:
            raise ValueError('Sizes not same.')

        self.g = [0.0] + g
        self.r = r

    def function_value(self, T: set):
        if not T.issubset(range(len(self) + 1)):
            raise ValueError("The provided set is not a subset of the ground set.")

        return self.g[len(T)]  # - r_T

    @staticmethod
    def is_cardianality_submodular(g: List[float]):
        """
        Checks if the cardinality function g induces submodular f
        :param g: cardinality function g on {0, 1, ..., n}
        :return: True if f is submodular, False otherwise
        """
        n = len(g) - 1

        # Check if g is monotonic nonincreasing
        for i in range(n):
            if g[i + 1] < g[i]:
                logging.debug('Nonmonotonic: i = ' + str(i) + ', g[i] = ' + str(g[i]) +
                              ', g[i + 1] = ' + str(g[i + 1]))
                return False

        # Check if g is concave
        for i in range(n - 1):
            if g[i] + g[i + 2] > 2 * g[i + 1] + minimum_decimal_difference:
                logging.debug('Not concave: i = ' + str(i) + ', g[i] = ' + str(g[i]) +
                              ', g[i + 1] = ' + str(g[i + 1]) + ', g[i + 2] = ' + str(g[i + 2]))
                return False

        return True


class CardinalitySubmodularFunction(CardinalityDifferenceSubmodularFunction):
    def __init__(self, g: List[float], n):
        r = [0.0] * n
        super().__init__(g, r, n)


class PermutahedronSubmodularFunction(CardinalitySubmodularFunction):
    def __init__(self, n: int = 1):
        g = [float(n)]
        for i in range(1, n):
            g.append(float(g[-1] + n - i))
        super().__init__(g, n)


class SubmodularPolytope:
    def __init__(self, f: SubmodularFunction):
        self.f = f

    def __len__(self):
        return len(self.f)

    def is_feasible(self, x):
        pass

    def linear_optimization_over_base(self, c: List[float]):
        """
        Returns max (c^T x) and argmin (c^T x) for x in B(f)
        :param c: Vector of length |E|
        :return: max c^T x for x in B(f)
        """
        d, mapping = sort(c, reverse=True)  # Sort in revese order for greedy algorithm
        x = [0.0] * len(self)  # This will be the argmax
        # Greedy algorithm
        for i in range(len(self)):
            x[i] = self.f.function_value(set(range(i + 1))) - self.f.function_value(set(range(i)))

        opt = sum([x[i] * d[i] for i in range(len(self))])  # Opt value
        invert_x = permute(x, invert(mapping))  # Restore the original order for x
        invert_x = round_list(invert_x, base_decimal_accuracy)  # Round to base accuracy (5 decimal)

        return opt, invert_x  # Return max, argmax c^T x

    def linear_optimization_tight_sets(self, c: List[float], T: List[Set]):
        """
        Linear optimization over B(f) with additional constraints. Every set in T should also be
        tight. Returns max (c^Tx) and argmin (c^T x) for x in B(f), with the additional
        constraints that x(U) = f(U) for all U in T.
        :param c: cost vector
        :param T: set of tight sets. Assumed to be a chain for now, and T is assumed to be
        increasing. T[0] is assumed to be the emptyset set({}) and T[-1] is assumed to be the
        ground set
        :return: max c^T x under the constraints
        """
        permutation = {}
        count = 0
        for j in range(1, len(T)):
            U = T[j]
            D = U.difference(T[j - 1])
            for u in D:
                permutation[u] = count
                count = count + 1

        c1 = permute(c, permutation)
        x = []
        l = 0
        mappings = {}
        opt = 0

        for j in range(1, len(T)):
            U = T[j]
            y = []
            D = U.difference(T[j - 1])
            m = len(D)
            d, mapping = sort(c1[l: l + m], reverse=True)
            inverse_mapping = invert(mapping)
            for i in range(m):
                y.append(self.f.function_value(set(range(i + l + 1))) - self.f.function_value(set(
                    range(l + i))))
                opt = opt + d[i] * (self.f.function_value(set(range(l + i + 1))) -
                                    self.f.function_value(set(range(l + i))))
            y = permute(y, inverse_mapping)
            x = x + y
            l = l + m

        z = permute(x, invert(permutation))
        return opt, z

    def affine_minimizer(self, S: List[List[float]]):
        n = len(self)
        B = np.column_stack(S)
        C = np.transpose(B)
        D = np.linalg.inv(np.matmul(C, B))
        o = np.ones(n)
        alpha = (np.matmul(D, o)) / (np.matmul(o, np.matmul(D, o)))
        y = np.matmul(B, alpha)
        return y, alpha

    def minimum_norm_point(self, eps: float):
        def get_base_vertex():
            return [self.f.function_value(set(range(i + 1))) - self.f.function_value(set(range(i)))
                    for i in range(n)]

        def nonnegative_coordinates(y: List[float]):
            C = {}
            for i in range(len(y)):
                if y[i] < 0:
                    C.update({i: y[i]})

            return C

        eps = abs(eps)
        x = get_base_vertex()
        S = [x]  # Set S in the algorithm
        L = [1]  # Coefficients lambda_i
        s = 1  # |S|
        while True:
            _, q = self.linear_optimization_over_base(x)
            if np.linalg.norm(x) * np.linalg.norm(x) <= np.matmul(x, q) + eps * eps:
                break

            S = S + [q]
            L.append(0.0)

            while True:
                y, alpha = self.affine_minimizer(S)
                C = nonnegative_coordinates(alpha)
                if len(C) == 0:
                    break

                theta = min([L[k] / (L[k] - alpha[k]) for k in C])
                x = theta * y + (1 - theta) * x
                L = theta * y + (1 - theta) * L

            x = y

        return x


class CardinalityPolytope(SubmodularPolytope):
    """
    Class for cardinality based polytopes. Let N be the ground set of size n. Then,
    f is a submodular function on the power set of N, given by f(A) = g(|A|) for each subset A of
    N. g is called the cardinality function.
    """

    def __init__(self, f: CardinalitySubmodularFunction):
        super().__init__(f)
        self.f = f

    def __len__(self):
        """
        :return: size of the ground set S
        """
        return len(self.f)

    def is_feasible(self, x: List[float]):
        """
        Checks if point x is in P(f)
        :param x: point in space
        :return: True if x is in P(f), False, otherwise
        """
        # Descending sort
        x.sort(reverse=True)

        n = len(x)
        # Check if dimensions match:
        if n != len(self):
            raise ValueError('The dimension ' + str(n) + 'of the point does not match the '
                                                         'dimension ' + str(
                len(self)) + ' of the ground set.')

        # Check if x[0] + x[1] + ... + x[i - 1] <= g[i]
        prefix_sum_x = self.prefix_sum(x)
        for i in range(n):
            if prefix_sum_x[i] > self.f.g[i + 1]:
                return False

        return True

    def is_feasible_in_base(self, x: List[float]):
        """
        Checks if x in is B(f)
        :param x: point in space
        :return: True if x is in B(f), False otherwise
        """
        return True if sum(x) == self.f.g[-1] and self.is_feasible(x) else False

    @staticmethod
    def prefix_sum(x: List[float]):
        """
        :param x: Point in space
        :return: prefix sum of x
        """
        prefix_sum = [x[0]]
        for i in range(1, len(x)):
            prefix_sum.append(round(prefix_sum[i - 1] + x[i], high_decimal_accuracy))

        return prefix_sum

    @staticmethod
    def distance(x: float, y: float):
        return abs(x - y)


class Permutahedron(CardinalityPolytope):
    def __init__(self, f: PermutahedronSubmodularFunction):
        super().__init__(f)


In [3]:
def sort_vector(v):
    return np.array([v[k] for k in sorted(list(v.keys()))])


def construct_function(n,g):
    ground_set = list(range(1,n+1))
    discrete_concave = sorted(g,reverse =True)
    h = {}
    h[0] = 0
    for i,j in enumerate(discrete_concave):
        h[i+1] = h[i] + j
    return h


def submodular_function(n,g):
    
    func = construct_function(n,g)
    ground = list(range(1,n+1))
    
    def findsubsets(s, l): 
        return list(itertools.combinations(s, l)) 

    subsets = []
    for i in range(1,n+1):
        subsets.extend(findsubsets(ground, i))
        
    f = {}
    f[tuple([0])] = func[0]
    for i in subsets:
        f[i] = func[len(i)]

    return f


def submodular_oracle(S,func,card):
    if card == True:
        return func[len(S)]
    else:
        return func[S]

    
def greedy_submodular(w,func,card):
    
    #find permuation corresponding to cost vector sorted in decreasing order
    pi = np.argsort(-w)+1
    
    #s is the optimal chain of elements in ground set and x is the corresponsing optimal solution constructed by greedy
    x = {}
    s = {}
    s[0] = []
    for i,j in enumerate(pi):
        #extend chain based on permuation above
        s[i+1] = sorted(pi[:i+1])
        
        #x is then the marginal gain
        if w[j-1] > 0:
            x[j] = submodular_oracle(tuple(s[i+1]),func,card) - submodular_oracle(tuple(s[i]),func,card)
        else:
            x[j] = 0
        
    return sort_vector(x)


def greedy_submodular_base(w,func,card):
    
    #find permuation corresponding to cost vector sorted in decreasing order
    pi = np.argsort(-w)+1
    
    #s is the optimal chain of elements in ground set and x is the corresponsing optimal solution constructed by greedy
    x = {}
    s = {}
    s[0] = [0]
    for i,j in enumerate(pi):
        #extend chain based on permuation above
        s[i+1] = sorted(pi[:i+1])
        
        #x is then the marginal gain
        x[j] = submodular_oracle(tuple(s[i+1]),func,card) - submodular_oracle(tuple(s[i]),func,card)
        
    return sort_vector(x)


def greedy_submodular_chains(w,func,chains):
    
    #find permuation corresponding to cost vector sorted in decreasing order
    chains_new = chains + [list(np.ones(len(w)))]
    pi = []
    c_old = np.zeros(len(w))
    pi_prime = np.argsort(-w)
    for c in chains_new:
        c_new = np.array(c) - c_old
        s = list(np.where(c_new == 1)[0])
        for i in pi_prime:
            if i in s:
                pi.append(int(i)+1)
        c_old = c
                
    #s is the optimal chain of elements in ground set and x is the corresponsing optimal solution constructed by greedy
    x = {}
    s = {}
    s[0] = []
    k = int(sum(chains[-1]))
    for i,j in enumerate(pi):
        #extend chain based on permuation above
        s[i+1] = sorted(pi[:i+1])
        
        #x is then the marginal gain
        if w[j-1] >= 0 or i +1 <=k :
            x[j] = submodular_oracle(tuple(s[i+1]),func,card) - submodular_oracle(tuple(s[i]),func,card)
        else:
            x[j] = 0
        
    return sort_vector(x)

In [4]:
def line_search_card(x_0,a,func):
    n = len(x_0)
    card = True

    #check if all components of LS direction are negative since in this case result is trivial
    if all(a <0):
        lam = 0

    #otherwise we run discrete Newtons method for LS    
    else:

        #find initial lam based on singletons in the ground set; also gives check if a is feasible direction to begin with
        lam = min([(submodular_oracle([i+1],func,card) - x_0[i])/j for i,j in enumerate(a) if j > 0])
        
        #we only need to do n iterations since our polytope is cardinality based
        for i in range(len(x_0)):
            #try moving with magnitued lam
            y = x_0 + lam*a

            #sort y so we can check feasiblity in the base polytope
            pi = np.argsort(-y)+1
            
            #find cummulative sums of sorted vector so we can check feasibility/violations
            s = np.cumsum(sorted(y,reverse =True))
            violations = [func[i+1] - j for i,j in enumerate(s)]
            
            #find maximum violation and most violated set
            card_violated,violation_value = min(enumerate(violations), key=lambda x : x[1])
            #card_violated = min([i+1 for i,j in enumerate(violations) if j == violation_value])
            
            
            #if smallest violation is 0 (note they can't all be strictly greater than 0  because of base polytope)
            if np.round(violation_value,4) == 0:
                break
                
            #otherwise we update step size and repeat
            else:
                most_violated_set = pi[:card_violated+1]
                lam = (submodular_oracle(most_violated_set,func,card) - sum([x_0[i-1] for i in most_violated_set]))\
                        /sum([a[i-1] for i in most_violated_set])
                
    #note that if the sum of a != 0, then a will not satisfy base constraint, i.e. its not a feasbile direction
    #So if a is a feasbile direction for the polymatroid but not for the base polytope ground set V will
    #be the most violated in one of the iterations above. Thus, since x_0 \in B(f) we know that x(V) = f(V) 
    # and hence when updating lam the numerator will be 0 for the iteration which would then give that lam = 0
    # this shows the correctness of the algorithm and our construction of the violation for base constraint
    return lam

In [5]:
def get_indicator(S,n):
    dic = {k:0 for k in range(1,n+1)}
    for i in S:
        dic[i] = 1
    return sort_vector(dic)


def get_tight_sets(x,func):
    tight_sets = []
    s = np.cumsum(sorted(x,reverse =True))
    pi = np.argsort(-x)+1
    slack = [func[i+1] - j for i,j in enumerate(s)]
    for i,j in enumerate(slack):
        if np.round(j,4) > 0:
            continue
        else:
            tight_sets.append(get_indicator(pi[:i+1],n))
            
    return tight_sets

In [6]:
#line-search using golden-section rule
def segment_search(f, grad_f, x, y, tol=1e-6, stepsize=True):
    
    '''
    Minimizes f over [x, y], i.e., f(x+gamma*(y-x)) as a function of scalar gamma in [0,1]
    '''
    
    # restrict segment of search to [x, y]
    d = (y-x).copy()
    left, right = x.copy(), y.copy()
    
    # if the minimum is at an endpoint
    if np.dot(d, grad_f(x))*np.dot(d, grad_f(y)) >= 0:
        if f(y) <= f(x):
            return y, 1
        else:
            return x, 0
    
    # apply golden-section method to segment
    gold = (1+np.sqrt(5))/2
    improv = np.inf
    while improv > tol:
        old_left, old_right = left, right
        new = left+(right-left)/(1+gold)
        probe = new+(right-new)/2
        if f(probe) <= f(new):
            left, right = new, right
        else:
            left, right = left, probe
        improv = np.linalg.norm(f(right)-f(old_right))+np.linalg.norm(f(left)-f(old_left))
    x_min = (left+right)/2
    
    # compute step size gamma
    gamma = 0
    if stepsize == True:
        for i in range(len(d)):
            if d[i] != 0:
                gamma = (x_min[i]-x[i])/d[i]
                break
                
    return x_min, gamma


#Fucntion to compute away vertex
def away_step(grad, S):
    costs = {}
    
    for k,v in S.items():
        cost = np.dot(k,grad)
        costs[cost] = [k,v]
    vertex, alpha = costs[max(costs.keys())]  
    return vertex,alpha

#Function to update active set
def update_S(S,gamma, Away, vertex):
    
    S = S.copy()
    vertex = tuple(vertex)
    
    if not Away:
        if vertex not in S.keys():
            S[vertex] = gamma
        else:
            S[vertex] *= (1-gamma)
            S[vertex] += gamma
            
        for k in S.keys():
            if k != vertex:
                S[k] *= (1-gamma)
    else:
        for k in S.keys():
            if k != vertex:
                S[k] *= (1+gamma)
            else:
                S[k] *= (1+gamma)
                S[k] -= gamma
    return {k:v for k,v in S.items() if np.round(v,6) > 0}


def line_search(x, d, gamma_max,func):

    def fun(gamma):
        ls = x + gamma*d
        return func(ls)


    res = minimize_scalar(fun, bounds=(0, gamma_max), method='bounded')

    gamma = res.x
    ls = x + gamma*d        
    return ls, gamma


#AFW Algorithm
def AFW(x, S, lmo, epsilon, func, grad_f, f_tol, time_tol):
    
    #record primal gap, function value, and time every iteration
    now=datetime.datetime.now()
    primal_gap = []
    function_value=[func(x)]
    time = [0]
    f_improv = np.inf

    #initialize starting point and active set
    t = 0    

    while f_improv > f_tol and time[-1] < time_tol:
        # print('here!')
        
        start = process_time()
        
        #compute gradient
        grad = grad_f(x)

        #solve linear subproblem and compute FW direction
        v = lmo(-grad)
        # print(v)
        d_FW = tuple(np.array(v) - np.array(x))

        #If primal gap is small enough - terminate
        if np.dot(-grad,d_FW) <= epsilon:
            # print(epsilon, grad, d_FW, np.dot(-grad, d_FW))
            # print('dot too small :(')
            break
        else:
            #update convergence data
            primal_gap.append(np.dot(-grad,d_FW))

        #Compute away vertex and direction
        a,alpha_a = away_step(grad, S)
        d_A = tuple(np.array(x) - np.array(a))

        #Check if FW gap is greater than away gap
        if np.dot(-grad,d_FW) >= np.dot(-grad,d_A):
            #choose FW direction
            d = d_FW
            vertex = v
            gamma_max = 1
            Away = False
        else:
            #choose Away direction
            d = d_A
            vertex = a
            gamma_max = alpha_a/(1-alpha_a)
            Away = True

        #Update next iterate by doing a feasible line-search
        # print(type(d), type(x), type(gamma_max))
        x, gamma = line_search(np.array(x), np.array(d), gamma_max, func)
        #x, gamma = segment_search(func, grad_f, x, x + gamma_max *d)

        #update active set
        S = update_S(S,gamma, Away, vertex)
        
        end = process_time()
        time.append(time[t] + end - start)
        
        f_improv = function_value[-1] - func(x)
        function_value.append(func(x))
        
        t+=1
        
    return x, function_value, time, t, primal_gap, S

In [7]:
def convex_hull_correction1(S, func):    

    M = np.array([np.array(i) for i in S])
    
    def fun(theta):
        return func(np.dot(M.T,theta))

    cons = ({'type': 'eq', 'fun': lambda theta: sum(theta) - 1}) #sum of theta = 1
    bnds = tuple([(0, 1) for _ in M])
    x0 = tuple([1/len(M) for _ in M])

    res = minimize(fun, x0, bounds=bnds, constraints=cons)
    
    final_S = {tuple(M[i]):res.x[i] for i in range(len(M)) if np.round(res.x[i],5) > 0}

    return np.dot(M.T,res.x),final_S


def convex_hull_correction2(S, q):

    M = np.array([np.array(i) for i in S])
   
    opt, theta = proj_oracle(M,q)
    
    final_S = {tuple(M[i]):theta[i] for i in range(len(M))if np.round(theta[i],5) > 0}

    return opt, final_S


def proj_oracle(vertices,y):
    m = Model("opt")
    n = len(vertices[0])
    
    #define variables
    lam = {}
    for i in range(len(vertices)):
        lam[i] = m.addVar(lb=0, name='lam_{}'.format(i))
    
    x = []
    for i in range(n):
        x.append(m.addVar(lb=-GRB.INFINITY, name='x_{}'.format(i)))
    x = np.array(x)
    m.update()

    objExp = 0.5* np.dot(x-y, x-y)
    m.setObjective(objExp, GRB.MINIMIZE)
    m.update()

    #feasibility constraints
    for i in range(n):
        m.addConstr(x[i],'=', sum([lam[j]*vertices[j][i] for j in range(len(vertices))]))

    #convex hull constraint
    m.addConstr(quicksum([lam[i] for i in lam.keys()]), '=',1)
    m.update()
    
    #optimize
    m.setParam( 'OutputFlag', False )
    m.write('exact.lp')
    m.optimize()
    return np.array([i.x for i in x]), np.array([lam[i].x for i in lam])

In [8]:
def maximal_tight(y,g):
    
    #sort y so we can check feasiblity in the base polytope
    pi = np.argsort(-y)+1

    #find cummulative sums of sorted vector so we can check feasibility/violations
    s = np.cumsum(sorted(y,reverse =True))
    violations = np.round(np.array([g[i+1] - j for i,j in enumerate(s)]),6)
    
    if any (violations < 0):
        return 'y not feasible'
    elif all(violations > 0):
        return [0]
    else:
        return pi[:np.arange(len(y))[violations==0][-1]+1]

    
def proj(x):
    return 0.5*np.dot(x - y,x-y)


def proj_grad(x):
    return x - y


def chi(M):
    chi_0 = np.zeros(n)
    for i in M:
        chi_0[i-1] = 1
    return chi_0


def f(s):
    return sum([n + 1 - i for i in range(1,s+1)])


def compute_M(N,x):
    return N[np.round(proj_grad(x)[N - 1],5) == np.round(np.min(proj_grad(x)[N - 1]),5)]

In [9]:
def linear_oracle(A1,A2,b1,b2, c):
    
    '''
    General form LP solver that solves min c^Tx subject to Ax <= b
    '''

    m = Model("opt")
    n = len(A.T)

    #define variables
    x = []
    for i in range(n):
        x.append(m.addVar(lb=-GRB.INFINITY, name='x_{}'.format(i)))

    m.update()              

    objExp = np.dot(np.array(x),c)
    m.setObjective(objExp, GRB.MAXIMIZE)
    m.update()

    #feasibility constraints
    for i in range(len(A1)):
        m.addConstr(np.dot(np.array(x),A1[i]),'<=', b1[i])
        
    for i in range(len(A2)):
        m.addConstr(np.dot(np.array(x),A2[i]),'==', b2[i])


    m.update()

    #convex hull constraint

    m.update()

    #optimize
    m.setParam( 'OutputFlag', False )
    m.optimize()

    return np.array([i.x for i in x]), m.Pi

In [10]:
from constants import *
from submodular_polytope import CardinalitySubmodularFunction, CardinalityPolytope, \
    PermutahedronSubmodularFunction, Permutahedron
from utils import generate_random_permutation
import random

n = 100
f = PermutahedronSubmodularFunction(n)
P = Permutahedron(f)


def generate_loss_function_vector_for_permutahedron(n: int):
    x = [random.random() for i in range(n)]         # Each entry is random number between 0 and 1
    s = sum(x)
    x = [(x[i] * n)/s for i in range(n)]            # Normalize so the sum is always 1
    x.sort(reverse=True)
    return np.array(x)


def generate_losses_and_random_vector(n: int, T: int):
    loss_vectors_list = [generate_loss_function_vector_for_permutahedron(n) for i in range(T)]
    x = tuple(generate_random_permutation(n))
    v = tuple(generate_random_permutation(n))
    return loss_vectors_list, x, v


def online_mirror_descent_permutahedron(P: Permutahedron, T: int):
    """
    Performs online mirror descent on a permutahedron
    :param P: permuathedron. See submodular_polytope.py for class definition
    :param T: number of iterations
    :return: Total regret
    """
    n = len(P)
    D = (n ** 3 - n)/6                  # Diameter of permutahedron
    G = n                               # Upper bound on norm l1 norm
    alpha = 1                           # For Euclidean projection
    eta = (D/G) * math.sqrt((2 * alpha)/T)
    
    total_time_vanilla, total_time_active_set_optimized, total_time_doubly_optimized = 0.0, 0.0, 0.0
    fw_iterations_vanilla, fw_iterations_active_set_optimized, fw_iterations_doubly_optimized = [], [], []
    
    loss_vectors_list, x_0, v = generate_losses_and_random_vector(n, T)
    S_vanilla, S_active_set_optimized, S_doubly_optimized = {x_0: 1}, {x_0: 1}, {x_0: 1}                        # Active vertex set with its coefficients
    
    # Vanilla FW
    for t in range(T):
        max_coeff = 0
        x = x_0
        for sigma in S_vanilla:
            if S_vanilla[sigma] > max_coeff:
                max_coeff = S_vanilla[sigma]
                x = sigma

        l = loss_vectors_list[t]
        loss = np.dot(x, l)
        y = x - eta * l
        
        f = lambda x: 0.5 * np.dot(x - y, x - y)
        grad_f = lambda x: np.array(x) - np.array(y)
        
        def lmo(x):
            _, v = P.linear_optimization_over_base(x)
            return tuple(v)
        
        f_tol,time_tol,epsilon = -1, np.inf, 0.1
        fw_sol, fw_func, fw_time, fw_iter, fw_gap, S_vanilla = AFW(v, {v: 1}, lmo, epsilon,f,grad_f, f_tol, time_tol)
        fw_iterations_vanilla.append(fw_iter)
        
        total_time_vanilla = total_time_vanilla + sum(fw_time)
        
    # Active set optimized FW
    for t in range(T):
        max_coeff = 0
        x = x_0
        for sigma in S_active_set_optimized:
            if S_active_set_optimized[sigma] > max_coeff:
                max_coeff = S_active_set_optimized[sigma]
                x = sigma

        l = loss_vectors_list[t]
        loss = np.dot(x, l)
        y = x - eta * l
        
        f = lambda x: 0.5 * np.dot(x - y, x - y)
        grad_f = lambda x: np.array(x) - np.array(y)
        
        def lmo(x):
            _, v = P.linear_optimization_over_base(x)
            return tuple(v)
        
        f_tol,time_tol,epsilon = -1, np.inf, 0.1
        fw_sol, fw_func, fw_time, fw_iter, fw_gap, S_active_set_optimized = AFW(x, S_active_set_optimized, lmo, epsilon,f,grad_f, f_tol, time_tol)
        
        fw_iterations_active_set_optimized.append(fw_iter)
        total_time_active_set_optimized = total_time_active_set_optimized + sum(fw_time)
                
    # Doubly optimized FW
    for t in range(T):
        max_coeff = 0
        x = x_0
        for sigma in S_doubly_optimized:
            if S_doubly_optimized[sigma] > max_coeff:
                max_coeff = S_doubly_optimized[sigma]
                x = sigma

        l = loss_vectors_list[t]
        loss = np.dot(x, l)
        y = x - eta * l
        
        f = lambda x: 0.5 * np.dot(x - y, x - y)
        grad_f = lambda x: np.array(x) - np.array(y)
        
        def lmo(x):
            _, v = P.linear_optimization_over_base(x)
            return tuple(v)
        
        f_tol,time_tol,epsilon = -1, np.inf, 0.1
        x, S_doubly_optimized = convex_hull_correction2(S_doubly_optimized, y)
        fw_sol, fw_func, fw_time, fw_iter, fw_gap, S_doubly_optimized = AFW(x, S_doubly_optimized, lmo, epsilon,f,grad_f, f_tol, time_tol)
        
        fw_iterations_doubly_optimized.append(fw_iter)
        total_time_doubly_optimized = total_time_doubly_optimized + sum(fw_time)
        
    return total_time_vanilla, total_time_active_set_optimized, total_time_doubly_optimized

online_mirror_descent_permutahedron(P, 1000)

Academic license - for non-commercial use only - expires 2021-07-09
Using license file C:\Users\jaimo\gurobi.lic


  opt, theta = proj_oracle(M,q)


(1250.625, 314.1875, 129.265625)

In [20]:
def online_mirror_descent_permutahedron_optimized(P: Permutahedron, T: int):
    """
    Performs online mirror descent on a permutahedron
    :param P: permuathedron. See submodular_polytope.py for class definition
    :param T: number of iterations
    :return: Total regret
    """
    n = len(P)
    D = (n ** 3 - n)/6                  # Diameter of permutahedron
    G = n                               # Upper bound on norm l1 norm
    alpha = 1                           # For Euclidean projection
    eta = (D/G) * math.sqrt((2 * alpha)/T)
    total_time = 0

    v = tuple(generate_random_permutation(n))
    x_0 = generate_random_permutation(n)
    S = {tuple(x_0): 1}                        # Active vertex set with its coefficients
    for t in range(T):
        max_coeff = 0
        x = x_0
        for sigma in S:
            if S[sigma] > max_coeff:
                max_coeff = S[sigma]
                x = sigma

        l = generate_loss_function_vector_for_permutahedron(n)
        loss = np.dot(x, l)
        y = x - eta * l
        # print(y)
        f = lambda x: 0.5 * np.dot(x - y, x - y)
        grad_f = lambda x: np.array(x) - np.array(y)
        def lmo(x):
            _, v = P.linear_optimization_over_base(x)
            return tuple(v)
        f_tol,time_tol,epsilon = -1, np.inf, 0.1
        
        # x, S = convex_hull_correction2(S, y)
        AFW_sol, AFW_func, AFW_time, AFW_iter,AFW_gap, S = AFW(x, S, lmo, epsilon,f,grad_f, f_tol, time_tol)
        # print(sum(AFW_time))
        print(AFW_iter)
        total_time = total_time + sum(AFW_time)

    return total_time



online_mirror_descent_permutahedron_vanilla(P, 100)

88
50
53
55
30
24
37
30
31
30
30
37
33
32
26
30
29
29
32
30
32
30
37
27
30
28
31
28
31
35
23
28
31
31
38
26
30
36
27
32
32
34
23
23
32
29
27
30
29
26
30
37
30
31
30
28
42
23
29
31
31
30
29
26
30
30
37
30
23
33
44
30
38
31
30
30
40
24
27
37
30
29
31
28
22
30
40
30
25
32
28
34
30
30
29
30
23
36
27
32


101.640625

In [21]:
online_mirror_descent_permutahedron_optimized(P, 100)

98
61
27
10
7
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


16.015625

In [22]:
def online_mirror_descent_permutahedron_doubly_optimized(P: Permutahedron, T: int):
    """
    Performs online mirror descent on a permutahedron
    :param P: permuathedron. See submodular_polytope.py for class definition
    :param T: number of iterations
    :return: Total regret
    """
    n = len(P)
    D = (n ** 3 - n)/6                  # Diameter of permutahedron
    G = n                               # Upper bound on norm l1 norm
    alpha = 1                           # For Euclidean projection
    eta = (D/G) * math.sqrt((2 * alpha)/T)
    total_time = 0

    v = tuple(generate_random_permutation(n))
    x_0 = generate_random_permutation(n)
    S = {tuple(x_0): 1}                        # Active vertex set with its coefficients
    for t in range(T):
        max_coeff = 0
        x = x_0
        for sigma in S:
            if S[sigma] > max_coeff:
                max_coeff = S[sigma]
                x = sigma

        l = generate_loss_function_vector_for_permutahedron(n)
        loss = np.dot(x, l)
        y = x - eta * l
        # print(y)
        f = lambda x: 0.5 * np.dot(x - y, x - y)
        grad_f = lambda x: np.array(x) - np.array(y)
        def lmo(x):
            _, v = P.linear_optimization_over_base(x)
            return tuple(v)
        f_tol,time_tol,epsilon = -1, np.inf, 0.1
        
        x, S = convex_hull_correction2(S, y)
        AFW_sol, AFW_func, AFW_time, AFW_iter,AFW_gap, S = AFW(x, S, lmo, epsilon,f,grad_f, f_tol, time_tol)
        # print(sum(AFW_time))
        print(AFW_iter)
        total_time = total_time + sum(AFW_time)

    return total_time

online_mirror_descent_permutahedron_doubly_optimized(P, 100)

  opt, theta = proj_oracle(M,q)


85
44
34
17
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


12.734375