In [1]:
from google.colab import drive
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import json
from collections import Counter
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!cd drive

In [3]:
cd drive

/content/drive


In [4]:
cd MyDrive

/content/drive/MyDrive


In [5]:
sample_graphs = ["J.5.1.json","RR.5.1.json"]

In [46]:
def get_seeds(N, filename):
  """
  A function to get N seeds for a graph given by filename

  N: the number of seeds for a graph
  filename: the filename for (in json format) for the graph representation
  """
  with open(filename,"r") as f:
    data = f.read()
  adj = json.loads(data)
  G = nx.Graph(adj)
  # Return every node if N is greater than the number of nodes in the graph
  if N >= len(list(G.nodes())):
    return list(G.nodes())
  
  # Rough idea is to find all components, put at least one in each, then scale based on size of each
  comps = sorted(nx.connected_components(G), key=len, reverse=True)
  comp_len = [len(c) for c in comps]

  # Iterate through each connected component and select the vertex with the highest centrality measure
  # Closeness centrality selected off of very little thought
  ans = []
  if len(comp_len) >= N:
    for idx in range(len(comp_len)):
      component = comps[idx]
      vals = nx.closeness_centrality(G.subgraph(component))
      for k,v in Counter.most_common(vals,1):
        ans.append(k)
      '''
      vmax = 0
      kidx = 0
      for k,v in vals:
        if v > vmax:
          kidx = k
      '''

  # If there are more components in one than the other, put all in largest (temporary idea)
  # Better idea may be to put a proportional number of nodes per component size after 1 in each
  else:
    num_extra = N - len(comp_len)
    curr_taken = 0
    for component in comps:
      vals = nx.closeness_centrality(G.subgraph(component))
      nodes = list(G.subgraph(component).nodes())
      comp_size = len(nodes) # size of component
      # Extra nodes added to components already, take highest in component
      if curr_taken == num_extra:
        for k,v in Counter.most_common(vals,1):
          ans.append(k)
      # Component doesn't have enough nodes to fill N
      elif (comp_size-1) < (num_extra-curr_taken):
        # Just add every node
        for node in nodes:
          ans.append(node)
        curr_taken += comp_size-1
      else:
        for k,v in Counter.most_common(vals, num_extra-curr_taken+1):
          ans.append(k)
        curr_taken += num_extra-curr_taken
    
  if len(ans) != N:
    print("Something went wrong")
  return ans

In [47]:
N = 50
for sample in sample_graphs:
  ans = get_seeds(50,sample)
  print(ans)
  print(len(ans))

['2', '10', '9', '17', '33', '8', '13', '92', '75', '11', '0', '91', '16', '56', '60', '50', '6', '25', '72', '89', '1', '77', '4', '3', '57', '7', '19', '88', '48', '113', '76', '15', '80', '83', '26', '69', '65', '98', '129', '124', '143', '5', '90', '27', '85', '53', '74', '20', '44', '196']
50
['11', '21', '33', '23', '30', '51', '39', '28', '13', '41', '43', '50', '40', '37', '56', '70', '34', '48', '71', '1', '83', '3', '12', '44', '65', '24', '4', '36', '38', '74', '42', '47', '53', '58', '84', '52', '76', '17', '77', '85', '18', '10', '49', '66', '97', '14', '16', '45', '87', '25']
50
