In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import graphviz

In [2]:
def create_grey_to_black_colormap():
  color_dictionary = {
    'red': [
      (0.0, 0.75, 0.75),
      (1.0, 0.0, 0.0)
    ],
    'green': [
      (0.0, 0.75, 0.75),
      (1.0, 0.0, 0.0)
    ],
    'blue': [
      (0.0, 0.75, 0.75),
      (1.0, 0.0, 0.0)
    ]
  }
    
  return mcolors.LinearSegmentedColormap('GreyToBlack', color_dictionary)

def create_yellow_colormap():
  light_yellow = mcolors.hex2color('#FFF9E2')
  dark_yellow = mcolors.hex2color('#FFD500')
    
  color_dictionary = {
    'red':   [(0.0, light_yellow[0], light_yellow[0]), (1.0, dark_yellow[0], dark_yellow[0])],
    'green': [(0.0, light_yellow[1], light_yellow[1]), (1.0, dark_yellow[1], dark_yellow[1])],
    'blue':  [(0.0, light_yellow[2], light_yellow[2]), (1.0, dark_yellow[2], dark_yellow[2])]
  }

  return mcolors.LinearSegmentedColormap('YellowGradient', color_dictionary)

def create_color_gradient(values, colormap):
  norm = plt.Normalize(np.min(values), np.max(values))
  normalized_values = norm(values)    
  rgb_colors = colormap(normalized_values)
  return [mcolors.to_hex(color) for color in rgb_colors]

In [3]:
grey_to_black_cmap = create_grey_to_black_colormap()
light_to_dark_yellow_cmap = create_yellow_colormap()

In [4]:
def visualize_graph(name):
  df = pd.read_csv(f'{name}.csv')
  nodes = pd.concat([
    df[['from_id', 'from']].rename(columns = {'from_id' : 'id', 'from' : 'token'}),
    df[['to_id', 'to']].rename(columns = {'to_id' : 'id', 'to' : 'token'})],
    axis = 0
  ).drop_duplicates()
  edge_colors = create_color_gradient(df['weight'], grey_to_black_cmap)
  graph = graphviz.Digraph('3-gram', engine = 'circo') # engine = 'circo', 'twopi', 'dot
  graph.graph_attr['dpi'] = '300'

  for i, node in nodes.iterrows():
    graph.node(str(node['id']), label = node['token'], color = '#363636', style = 'filled', fillcolor = '#ffd53d', shape = 'oval')

  for i, row in df.iterrows():
    graph.edge(str(row['from_id']), str(row['to_id']), arrowsize = '0.5', color = edge_colors[i])
  graph.render(directory = f'./{name}', format = 'png', view = True)

# Grouped, No threshold

In [5]:
visualize_graph('Grouped-0.0%')

# Grouped, 60% quantile threshold

In [6]:
visualize_graph('Grouped-60.0%')

# Surrogate, No threshold

In [7]:
visualize_graph('Surrogate-0.0%')

# Surrogate, 60% quantile threshold

In [8]:
visualize_graph('Surrogate-60.0%')