# Stochastic Red Blue Set Covering 

Experiments for Approximation Algorithms 

In [None]:
!sudo python -m pip install gurobipy>=9.5.1
!pip install netgraph
import gurobipy as gp
import pandas as pd
import numpy as np
from gurobipy import GRB
from google.colab import drive
from itertools import product
import math, sys, time
from netgraph import Graph, InteractiveGraph, EditableGraph
import matplotlib.pyplot as plt
import multiprocessing
import networkx as nx
import random
import pickle as pkl

drive.mount('/content/gdrive')
pth = 'gdrive/My Drive/Colab Notebooks/'
sys.path.append(pth + 'RBSC/')
%matplotlib inline

In [None]:
#read in licence info
print("Input WLS Access ID")
accessid = input()
print("Input WLS License ID")
licenseid = input()
print("Input WLS Secret Key")
secret_key = input()
#web license try to access it via uoft
e = gp.Env(empty=True)
#e.setParam('OutputFlag', 0)
e.setParam('WLSACCESSID', accessid)
e.setParam('LICENSEID', int(licenseid))
e.setParam('WLSSECRET', secret_key)
e.start()

In [3]:
from rbsc import *

In [4]:
def defineinstance(N,n_elem, Scenarios, maxBlueProb, TransmissionProb, CoverFactor, N_nodes, lambd, Plot = False):
  Sets = {}
  n=np.arange(n_elem)+1
  # Adapated from https://stackoverflow.com/questions/71024509/create-different-disconnected-graphs-from-a-set-of-fixed-nodes-in-networkx
  #randomly permuting nodes
  Elements=np.random.permutation(n)

  N_graphs= round(math.sqrt(n_elem)/2)
  #assign the random modes to each graph
  random_graphs_nodes=[Elements[N_nodes*i:N_nodes*(i+1)] for i in range(N_graphs)]

  #create random graphs
  r_g=[nx.erdos_renyi_graph(n=N_nodes,p=0.5) for _ in range(N_graphs)]

  #relabel the nodes in each graph according to random_graphs_nodes
  mappings=[]
  for i in range(N_graphs):
    mappings.append({j:random_graphs_nodes[i][j] for j in range(N_nodes)})
    r_g[i]=nx.relabel_nodes(r_g[i], mappings[i]) 

  if Plot:
    #plot result
    fig=plt.figure(figsize=(15,6))
    for i in range(N_graphs):
      plt.subplot(1,N_graphs,i+1)
      plt.xlabel('Graph '+str(i+1))
      plt.tight_layout()
      # nx.draw(r_g[i],pos=pos,with_labels=True,node_color=colors[i])
      g = Graph(r_g[i],node_labels=True, node_layout = 'spring',
          node_label_fontdict=dict(size=10), node_label_offset=0.05, node_size=3, edge_width=0.4)
      plt.savefig("UncertaintyGraph.svg")
  NodeProbability = {e:np.random.random()*maxBlueProb for e in Elements}
  RedScenarios = {}
  BlueScenarios = {}
  for xi in range(Scenarios):
    Blues_xi = {}
    Reds_xi = {}
    for e in Elements:
      if np.random.random() <= NodeProbability[e]:
        Blues_xi[e] = 'B'
    if Blues_xi == {}:
      Blues_xi[e] = 'B'
    InitialBlues= list(Blues_xi.keys())
    for b in InitialBlues:
      for i in range(N_graphs):
        if b in r_g[i].nodes():
          #propagate the fraud
          infectedByb = []
          propagate(r_g[i],b,infectedByb, TransmissionProb)
          for infected in infectedByb:
            Blues_xi[infected] = 'B'
    Reds_xi = {e:'R' for e in Elements if e not in Blues_xi.keys()}
    BlueScenarios[xi] = Blues_xi
    RedScenarios[xi] = Reds_xi

    for xi in BlueScenarios.keys():#test 
      if set(BlueScenarios[xi]).union(set(RedScenarios[xi])) != set(Elements):
        print("Something is wrong")

  #form the extensive problem 
  ExtensiveBlues = {(b, xi):'B' for xi in BlueScenarios.keys() for b in BlueScenarios[xi].keys()}
  #blues are all combinations of xi and blue elements 
  #reds are all combinations of xi and red elements + one red for each x, blue pair with weight lambda 
  #sets are the given sets plus the super sets for each xi, blue element pair
  ExtensiveReds = {(r, xi): 'R' for xi in RedScenarios.keys() for r in RedScenarios[xi].keys()}
  ExtensiveRedsWeights = {(r, xi): 1/Scenarios for xi in RedScenarios.keys() for r in RedScenarios[xi].keys()}

  BlueMapping = {}
  LastElement = max(Elements)
  k = 1
  for (b, xi) in ExtensiveBlues.keys():
    BlueMapping[(b, xi)] = (LastElement + k, xi)
    ExtensiveReds[(LastElement + k, xi)] = 'Super'
    ExtensiveRedsWeights[(LastElement + k, xi)] = lambd/Scenarios
    k = k + 1

  #form first stage sets that cover elements that are ever blue
  k = 0
  EverBlues = set(b for (b,xi) in ExtensiveBlues.keys())
  UncoveredBlues = [b for b in EverBlues]
  #randomly sample until all the blues are covered 
  while (k < N or UncoveredBlues != []):
    n_k = random.sample(range(1,(1+len(Elements))//CoverFactor),1)[0]
    DoesNotCoverAnyBlue = True
    while DoesNotCoverAnyBlue:
      SetCandidate = random.sample(list(Elements), n_k)
      BlueCoveredBool = [b in SetCandidate for b in EverBlues]
      if True in BlueCoveredBool:
        DoesNotCoverAnyBlue = False
    if UncoveredBlues == []:
      ind = 'Set' + str(k)
      Sets[ind] = SetCandidate
      k = k+1

    CoversNewBlue = False
    for element in EverBlues:
      if element in SetCandidate and element in UncoveredBlues:
        UncoveredBlues.remove(element)
        CoversNewBlue = True
    if CoversNewBlue:
      ind = 'Set' + str(k)
      Sets[ind] = SetCandidate
      k = k+1
  #element scenario pairs 
  ExtensiveFormElements = set(product(Elements, range(Scenarios)))

  ScenarioFormSets = {}

  #appending the base elements for each scenario
  for S in Sets.keys():
    ScenarioFormSets[S] = set()
    for e, xi in ExtensiveFormElements:
      probability_swap = 0.1
      if e in Sets[S] and probability_swap <= np.random.random():
        ScenarioFormSets[S].add((e,xi))
      else:
        RandomSet = random.choice(list(Sets.keys())) #pick new set
        try:
          ScenarioFormSets[RandomSet].add((e,xi))
        except:
          ScenarioFormSets[RandomSet] = []

  #appending the super sets
  SuperLookup = {}
  bLookup = {}
  k = 0
  for (b, xi) in ExtensiveBlues:
    (r, xi2) = BlueMapping[(b,xi)]
    ScenarioFormSets['Super'+str(k)] = [(b, xi), (r, xi)]
    RedScenarios[xi][r] = 'R' #include the new red elements here
    SuperLookup[(b, xi)] =  'Super'+str(k)
    bLookup[('Super'+str(k), xi)] = b
    k += 1

  SetsIndexedbyScenario = {}
  for xi in range(Scenarios):
    SetsinScenario = {}
    for S in ScenarioFormSets.keys():
      Temp = []
      for pair in ScenarioFormSets[S]:
        if pair[1] == xi: 
          Temp.append(pair[0])
      if Temp != []:
        SetsinScenario[S] = Temp
    SetsIndexedbyScenario[xi] = SetsinScenario
  SetsinScenario

  #getting the weights indexed by scenario
  WeightsIndexedbyScenario = {}
  for xi in range(Scenarios):
    WeightsinScenario = {}
    for (r, xi_2), wgt in ExtensiveRedsWeights.items():
      if xi_2 == xi:
        WeightsinScenario[r] = wgt
    WeightsIndexedbyScenario[xi] = WeightsinScenario
    
  return Sets, ExtensiveReds, ExtensiveBlues, ScenarioFormSets, ExtensiveRedsWeights, SetsIndexedbyScenario, RedScenarios, BlueScenarios, WeightsIndexedbyScenario, SuperLookup, bLookup, BlueMapping

## Experiments

In [12]:
Results = {}
n_elem = 100 #number of elements 
maxBlueProb = 0.1 #
TransmissionProb = 0.9
mipgaptol = 0.05
global LIMIT #time limit
LIMIT = 2*60*60
CoverFactor = 1
N_nodes=4
output = False
random.seed(10)

M = 10 #replications 

trial = 0 
AverageNumberofReds = {}

set_sizes = [10, 15]

scenario_sizes = [10, 30, 90]*M

multiplier = 2

In [None]:
for N, Scenarios in product(set_sizes, scenario_sizes):
#for N, Scenarios in product([5], [10]*M + [20]*M + [30]*M + [40]*M):

#for N, Scenarios in product([10, 40], [5]):
  print("Trial # ", trial)
  lambd = 25*(n_elem)/np.log(1+N) #average elements in set?

  Sets = {}

  #random scenario generation
  #define each element to be a part of a 
  #graph. With edge probability p 
  #create instance 

  start = time.time()
  (Sets, ExtensiveReds, ExtensiveBlues, ScenarioFormSets, 
  ExtensiveRedsWeights, SetsIndexedbyScenario, RedScenarios, 
  BlueScenarios, WeightsIndexedbyScenario, SuperLookup, 
  bLookup, BlueMapping) = defineinstance(N,n_elem, Scenarios,
                                          maxBlueProb, TransmissionProb, 
                                          CoverFactor, N_nodes, lambd, Plot = False)

  
  AverageNumberofReds[(N, Scenarios, trial)] = get_average_number_of_reds(RedScenarios, Sets)

  trial = trial + 1
  

In [14]:
AverageNumberofRedsSeries = pd.Series(AverageNumberofReds)
AverageNumberofRedsSeries.index = AverageNumberofRedsSeries.index.set_names(['NSets', 'NScenarios', 'Trial'])
AverageNumberofRedsSeriesGrouped = AverageNumberofRedsSeries.groupby(['NSets', 'NScenarios']).mean()
AverageNumberofRedsSeriesGrouped.to_pickle("gdrive/My Drive/Colab Notebooks/RBSC/data/average_reds.pkl")

In [15]:
AverageNumberofRedsSeriesGrouped = pd.read_pickle("gdrive/My Drive/Colab Notebooks/RBSC/data/average_reds.pkl")

In [None]:
trial = 0
multiplier = 2

for N, Scenarios in product(set_sizes, scenario_sizes):
#for N, Scenarios in product([5], [10]*M + [20]*M + [30]*M + [40]*M):
  print("Trial # ", trial)
#for N, Scenarios in product([10, 40], [5]):
  lambd = multiplier*AverageNumberofRedsSeriesGrouped[(N,Scenarios)] #average elements in set

  Sets = {}

  #random scenario generation
  #define each element to be a part of a 
  #graph. With edge probability p 
  #create instance 
  (Sets, ExtensiveReds, ExtensiveBlues, ScenarioFormSets, 
  ExtensiveRedsWeights, SetsIndexedbyScenario, RedScenarios, 
  BlueScenarios, WeightsIndexedbyScenario, SuperLookup, 
  bLookup, BlueMapping) = defineinstance(N,n_elem, Scenarios,
                                          maxBlueProb, TransmissionProb, 
                                          CoverFactor, N_nodes, lambd, Plot = False)

  ##exact soln alg on the original problem
  (SelectedReds, SelectedSets, 
  SolnEdges, ObjVal, ObjBound, RunTime, vals_y, vals_x) = DeterministicRedBlue(ExtensiveReds, ExtensiveBlues, 
                                                                ScenarioFormSets, ExtensiveRedsWeights, 
                                                                LIMIT, output = False, testing = True, 
                                                               mipgap = mipgaptol, 
                                                               Relax = False,
                                                               env = e)

  Results[('Extensive', trial, n_elem,N,Scenarios)] = [ObjVal, ObjBound, RunTime]  #add to the results dictionary
  
  # peleg algorithm 
  start = time.time()
  BestCover, MinWgt = LowDeg2(ExtensiveReds, ExtensiveBlues, ScenarioFormSets, ExtensiveRedsWeights)
  end = time.time()
  RedsinCandidates = ElementsinFamily(BestCover, ExtensiveReds)
  print("Peleg ", NumRedsinS(RedsinCandidates, ExtensiveReds, ExtensiveRedsWeights))
  if MinWgt > 1000:
    print("Best Sets ", BestCover.keys())
  Results[('Peleg', trial, n_elem,N,Scenarios)] = [MinWgt, 0 , end-start]   #add to the results dictionary

  
  ##approximation alg using LP on the original problem 
  # ##approximation alg using LP on the augmented problem
  AugScenarioFormSets, b_count, smallest_augmented_sets_containingb = FormAugmentedProblem(ExtensiveReds, ExtensiveBlues, ScenarioFormSets, ExtensiveRedsWeights)

  start = time.time()
  (SelectedReds, SelectedSets, 
  SolnEdges, ObjVal, ObjBound, RunTime, vals_y, vals_x) = DeterministicRedBlue(ExtensiveReds, ExtensiveBlues, 
                                                                ScenarioFormSets, ExtensiveRedsWeights, 
                                                                LIMIT, output = False, testing = True, 
                                                               mipgap = mipgaptol,
                                                               Relax = True,
                                                               env = e)
  
  Wgt = CarrApproximationAlgorithm(ExtensiveReds, 
                                                ExtensiveBlues, 
                                                vals_y, 
                                                AugScenarioFormSets, 
                                                b_count, 
                                                ExtensiveRedsWeights, 
                                                smallest_augmented_sets_containingb)
  end = time.time()
  print("Carr ", Wgt)
  Results[('Carr', trial, n_elem, N, Scenarios)] = [Wgt, 0 , end-start] #add to results dictionary

  #vals = m.getAttr('x', x)
  trial = trial + 1
  
  print("Completed instance defined by Nelem, N, Scenarios", (n_elem,N,Scenarios))
  with open("gdrive/My Drive/Colab Notebooks/RBSC/data/appx_results_d2_2023_"+ str(N)+".pkl", 'wb') as fp:
    pkl.dump(Results, fp);

# Results

* Import the pickle file for the results 
* Calculate the optimality gap 
* Calculate run-time statistics

In [52]:
# with open("gdrive/My Drive/Colab Notebooks/RBSC/data/appx_results_final.pkl", 'rb') as f:
#       Results = pkl.load(f);

# out = pd.DataFrame(Results, index = ['Objective', 'Bound', 'Time']).transpose()
# out.index = out.index.set_names(['Type', 'Trial', 'NElem', 'NSets', 'NScenarios'])
#pd.concat([out_10, out, out_30]).to_pickle("gdrive/My Drive/Colab Notebooks/RBSC/data/appx_results_final.pkl")

In [108]:
out = pd.read_pickle("gdrive/My Drive/Colab Notebooks/RBSC/data/appx_results_final.pkl")

In [None]:
out.groupby(level = ['Type','NElem', 'NSets', 'NScenarios']).median().unstack(level=0)

In [110]:
out.to_csv("gdrive/My Drive/Colab Notebooks/RBSC/data/Tests/APX2023.csv")

In [111]:
Extensive = out[out.index.get_level_values('Type') == 'Extensive'].droplevel("Type")
Peleg = out[out.index.get_level_values('Type') == 'Peleg'].droplevel("Type")
Carr = out[out.index.get_level_values('Type') == 'Carr'].droplevel("Type")

In [113]:
Peleg_Gap = 100*(Peleg.Objective - Extensive.Objective )/Extensive.Objective
Carr_Gap = 100*(Carr.Objective - Extensive.Objective )/Extensive.Objective
Carr_Gap= pd.DataFrame(Carr_Gap).reset_index()
Peleg_Gap = pd.DataFrame(Peleg_Gap).reset_index()

In [147]:
peleg_median = pd.DataFrame(Peleg_Gap.groupby(level = ['NElem', 'NSets', 'NScenarios']).median())
peleg_median.columns = ["Peleg"]
carr_median = pd.DataFrame(Carr_Gap.groupby(level = ['NElem', 'NSets', 'NScenarios']).median())
carr_median.columns = ["Carr et al."]
pd.concat([peleg_median, carr_median], axis = 1).stack().unstack(1).unstack(-1)

Unnamed: 0_level_0,NSets,10,10,15,15,20,20,25,25,30,30
Unnamed: 0_level_1,Unnamed: 1_level_1,Peleg,Carr et al.,Peleg,Carr et al.,Peleg,Carr et al.,Peleg,Carr et al.,Peleg,Carr et al.
NElem,NScenarios,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
100,10,95.894938,14.468923,3.98617,7.086563,6.400437,10.762658,7.091845,10.386482,6.420248,10.119101
100,30,647.472428,11.606424,26.098049,6.859024,2.377795,8.0719,2.954431,7.384697,3.954615,11.321768
100,90,603.281229,10.530841,654.828296,6.012412,280.558545,5.110912,3.890009,8.723967,1.599561,7.121934


In [148]:
peleg_median = pd.DataFrame(Peleg_Gap.groupby(level = ['NElem', 'NSets', 'NScenarios']).quantile(0.9))
peleg_median.columns = ["Peleg"]
carr_median = pd.DataFrame(Carr_Gap.groupby(level = ['NElem', 'NSets', 'NScenarios']).quantile(0.9))
carr_median.columns = ["Carr et al."]
pd.concat([peleg_median, carr_median], axis = 1).stack().unstack(1).unstack(-1)

Unnamed: 0_level_0,NSets,10,10,15,15,20,20,25,25,30,30
Unnamed: 0_level_1,Unnamed: 1_level_1,Peleg,Carr et al.,Peleg,Carr et al.,Peleg,Carr et al.,Peleg,Carr et al.,Peleg,Carr et al.
NElem,NScenarios,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
100,10,321.166785,26.724416,5.851621,19.949847,7.439764,19.531925,8.524402,20.396283,8.67963,12.810568
100,30,771.261418,23.679407,168.27705,13.380291,3.132667,14.032302,3.704932,10.483438,4.515847,12.334858
100,90,638.186952,15.192355,690.475458,14.099671,640.105432,7.545934,14.572275,10.8765,2.13684,8.862473


In [149]:
peleg_median = pd.DataFrame(Peleg.Time.groupby(level = ['NElem', 'NSets', 'NScenarios']).median())
peleg_median.columns = ["Peleg"]
carr_median = pd.DataFrame(Carr.Time.groupby(level = ['NElem', 'NSets', 'NScenarios']).median())
carr_median.columns = ["Carr et al."]
extensive_median = pd.DataFrame(Extensive.Time.groupby(level = ['NElem', 'NSets', 'NScenarios']).median())
extensive_median.columns = ["Exact Solve"]
pd.concat([peleg_median, carr_median, extensive_median], axis = 1).stack().unstack(1).unstack(-1)

Unnamed: 0_level_0,NSets,10,10,10,15,15,15,20,20,20,25,25,25,30,30,30
Unnamed: 0_level_1,Unnamed: 1_level_1,Peleg,Carr et al.,Exact Solve,Peleg,Carr et al.,Exact Solve,Peleg,Carr et al.,Exact Solve,Peleg,Carr et al.,Exact Solve,Peleg,Carr et al.,Exact Solve
NElem,NScenarios,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
100,10,0.177373,0.249229,0.749452,0.16677,0.341617,8.428429,0.179107,0.37239,26.390535,0.222474,0.462115,45.649095,0.257063,0.534971,64.625273
100,30,1.379,0.602259,1.33602,1.465921,2.250664,34.994136,1.768851,2.310948,118.289382,1.932849,3.658672,276.520048,1.970821,1.578101,590.474144
100,90,14.918968,2.704707,1.213435,15.790352,7.489182,99.876659,19.394004,15.347011,623.597029,21.728472,16.088557,2691.380237,25.073862,10.84439,4678.328706


In [150]:
peleg_median = pd.DataFrame(Peleg.Time.groupby(level = ['NElem', 'NSets', 'NScenarios']).quantile(0.9))
peleg_median.columns = ["Peleg"]
carr_median = pd.DataFrame(Carr.Time.groupby(level = ['NElem', 'NSets', 'NScenarios']).quantile(0.9))
carr_median.columns = ["Carr et al."]
extensive_median = pd.DataFrame(Extensive.Time.groupby(level = ['NElem', 'NSets', 'NScenarios']).quantile(0.9))
extensive_median.columns = ["Exact Solve"]
pd.concat([peleg_median, carr_median, extensive_median], axis = 1).stack().unstack(1).unstack(-1)

Unnamed: 0_level_0,NSets,10,10,10,15,15,15,20,20,20,25,25,25,30,30,30
Unnamed: 0_level_1,Unnamed: 1_level_1,Peleg,Carr et al.,Exact Solve,Peleg,Carr et al.,Exact Solve,Peleg,Carr et al.,Exact Solve,Peleg,Carr et al.,Exact Solve,Peleg,Carr et al.,Exact Solve
NElem,NScenarios,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
100,10,0.330299,0.365422,1.507533,0.203729,0.381027,14.551942,0.229358,3.107444,34.871325,0.257804,0.777085,54.900141,0.274407,0.577397,124.965705
100,30,1.864645,0.74347,1.90343,1.741735,3.679912,48.651929,1.998058,4.595937,131.636372,2.275843,3.896984,506.518874,2.340471,1.751431,777.699919
100,90,18.040343,2.937508,1.939995,17.613091,8.53481,156.095057,23.014753,16.701589,688.254853,24.587131,17.900403,3548.275909,26.884196,11.941151,5358.677894
