In [22]:
import networkx as nx
import random
import itertools

# networkx DiGraph representations of the drawn motifs in motifs.txt.
motifs = {
  "m1": nx.DiGraph([(1,2),(1,3)]),
  "m2": nx.DiGraph([(2,1),(3,1)]),
  "m3": nx.DiGraph([(1,2),(2,3)]),
  "m4": nx.DiGraph([(1,2),(2,3),(3,2)]),
  "m5": nx.DiGraph([(1,2),(1,3),(3,1)]),
  "m6": nx.DiGraph([(1,2),(2,1),(1,3),(3,1)]),
  "m7": nx.DiGraph([(1,2),(1,3),(2,3)]),
  "m8": nx.DiGraph([(1,2),(2,3),(3,1)]),
  "m9": nx.DiGraph([(2,1),(3,1),(2,3),(3,2)]),
  "m10": nx.DiGraph([(1,2),(1,3),(2,3),(3,2)]),
  "m11": nx.DiGraph([(1,2),(2,3),(3,2),(3,1)]),
  "m12": nx.DiGraph([(1,2),(2,1),(1,3),(3,1),(2,3)]),
  "m13": nx.DiGraph([(1,2),(2,1),(1,3),(3,1),(2,3),(3,2)]),
}

In [23]:
# Get input graph.
def get_graph():
  try:
    # Get input graph.
    with open(input('Provide path to graph file: '), 'r') as input_graph:
      G = nx.read_weighted_edgelist(input_graph, create_using=nx.DiGraph, nodetype = int)
    input_graph.close()
    
    return G
  except FileNotFoundError:
    print("FileNotFoundError: Invalid file or directory. Check the file exists and try again.")

# Get motif of interest.
def get_motif():
  motifs = open('motifs.txt', 'r')
  motif_prompt = motifs.read()
  
  chosen_motif = input(
    motif_prompt + "\n" +
    "Choose one motif listed above (1 - 13) of which you'll change its frequency: "
  )
  while not chosen_motif.isnumeric() or int(chosen_motif) < 1 or int(chosen_motif) > 13:
    chosen_motif = input(
      motif_prompt + "\n" + "Please enter a number between 1 and 13: "
    )
  
  motifs.close()
  
  return f"m{chosen_motif}"

# Get frequency change (increasing or decreasing).
def get_freq_change():
  freq = input("Increase or decrease frequency of motif? (i or d): ")
  while freq != "i" and freq != "d":
    freq = input("Please enter i or d: ")
    
  return freq

In [24]:
# Calculate frequency of specified motif in graph G by first finding all 3-node subgraphs
# and then determining how many are isomorphic to the specified motif.
def motif_freq(G, motif):
  vertices = list(G.nodes)
  motif = motifs[motif]
  triads = itertools.combinations(vertices, 3)

  freq = 0
  for tri in triads:
    subgraph = nx.subgraph(G, tri)
    if nx.is_isomorphic(subgraph, motif):
      freq += 1
  
  return freq

In [25]:
# Select two random edges from a given edge list.
def random_vertices(edges):
  u = random.choice(edges)
  v = random.choice(
    # Enforce selection of two different edges without any shared nodes
    [e for e in edges if len(set([u[0], u[1], e[0], e[1]])) == 4]
  )
  
  return u, v

# Swap two edges in graph G.
def swap_edges(G):
  edges = list(G.edges)
  u, v = random_vertices(edges)

  # Prevent multi-edge
  while G.has_edge(u[0], v[1]) or G.has_edge(v[0], u[1]):
    u, v = random_vertices(edges)

  G.remove_edge(u[0], u[1])
  G.remove_edge(v[0], v[1])
  
  G.add_edge(u[0], v[1])
  G.add_edge(v[0], u[1])
  
  return G, u, v

def undo_swap(G, u, v):
  G.remove_edge(u[0], v[1])
  G.remove_edge(v[0], u[1])
  
  G.add_edge(u[0], u[1])
  G.add_edge(v[0], v[1])
  
  return G


# Randomize graph G using biased link randomization, maintaining degree sequence.
def randomize(G, motif, freq_change):
  freq = motif_freq(G, motif)
  
  steps = 10 * G.number_of_edges()
  for _ in range(steps):
    G, u, v = swap_edges(G)
    new_freq = motif_freq(G, motif)
    
    # Discard edge swap if motif frequency does not change as desired.
    if (freq_change == "i" and new_freq < freq) \
      or (freq_change == "d" and new_freq > freq):
      G = undo_swap(G, u, v)
  
  return G, freq, new_freq

# TODO: After randomization, compare the number of occurrences of the motif in the 
# original network with the number of occurrences in the randomized one:
#   - if there are significantly more of the given motif in the original network, 
#     we can say that it is "over expressed",
#   - if there are significantly less of the given motif in the original network, 
#     we can say that it is "under expressed".
# More details in slides.

In [26]:
# Return the degree sequence of graph G.
def degree_sequence(G):
  return [d for _, d in G.degree()]

# TODO: Characteristics of interest: changing of network parameters such as the
# average clustering coefficient, assortativity coefficient, average shortest
# path length, etc., during the randomization.

In [27]:
def main():
  G, motif, freq_change = get_graph(), get_motif(), get_freq_change()
  r_G, freq, new_freq = randomize(G, motif, freq_change)
  
  print(degree_sequence(G) == degree_sequence(r_G))
  print(freq, new_freq)

In [28]:
if __name__ == '__main__':
  main()

True
11 7
