In [106]:
import pygraphviz as pgv
import xmltodict as xtd
import requests
from collections import OrderedDict
import random

In [107]:
# Read the RFC Index as XML and convert to a python Dict
#rfcIndexUrl = 'https://www.rfc-editor.org/in-notes/rfc-index.xml'
#xmlData = requests.get(rfcIndexUrl)
#dictData = xtd.parse(xmlData.text)
with open('rfc-index.xml','r') as xmlData:
    dictData = xtd.parse(xmlData.read())
rfcIndex = dictData['rfc-index']

In [108]:
# The RFC Index has the following elements of interest:
rfcIndex.keys()

odict_keys(['@xmlns', '@xmlns:xsi', '@xsi:schemaLocation', 'bcp-entry', 'fyi-entry', 'rfc-entry', 'rfc-not-issued-entry', 'std-entry'])

In [109]:
# Import color definitions to be used in coloring edges and nodes.
with open('svg-colors.txt','r') as colorFile:
    colors = colorFile.readlines()
    colors = [ color.strip() for color in colors]

In [110]:
# Initialize a new digraph, "G", to represent BCPs and RFCs as nodes;
# is-also's, obsolescence, and updates as edges.
G = pgv.AGraph(directed=True)
G.graph_attr['label']="RFC Directed Graph"
G.graph_attr['overlap']='false'

In [111]:
def graphNodes(graph, rfcIndexNode, attributes):
    '''graphNodes(): PyGraphVizGraph ListOfOrderedDicts ListOfStr--> PyGraphVizGraph
    Purpose: for each element of rfcIndexNode, add its 'doc-id' key value as a node
             to graph, and for each key value in attributes, add the key values for
             that node's attribute to graph as an edge, returning the updated graph.'''
    for node in rfcIndexNode:
        # Add the node to the graph
        try:
            graph.add_node(node['doc-id'])
        except:
            print("error adding node %s. No 'doc-id' string?" % node)
        
        # Now, add edges
        for attr in attributes:
            try:
                if isinstance(node[attr], OrderedDict):
                    if isinstance(node[attr]['doc-id'],str):
                        graph.add_edge(node['doc-id'],node[attr]['doc-id'],label=attr)
                    elif isinstance(node[attr]['doc-id'],list):
                        for standard in node[attr]['doc-id']:
                            graph.add_edge(node['doc-id'],standard,label=attr)
                    else:
                        print("error: %s is neither an string nor a list for node %s" % (node[attr], node['doc-id'] ))
                else:
                    print("error: %s isn't an OrderedDict for node %s" % (node[attr], node['doc-id']))
            except KeyError:
                pass
            
    return(graph)
                        

In [112]:
# Build the Graph
#G = graphNodes(G, rfcIndex['bcp-entry'], ['is-also'])
#G = graphNodes(G, rfcIndex['fyi-entry'], ['is-also'])
G = graphNodes(G, rfcIndex['rfc-entry'], ['is-also','obsoleted-by','updates'])

In [121]:
# Identify clusters of nodes. Here, we consider a cluster to be a subgraph
# of G having at least one node and all edges and adjacent nodes. (Need a
# more precise definition.)
clusters = dict()
for edge in G.edges():
    try:
        if clusters[edge[0]]:
            clusters[edge[0]].add(edge[1]) # Need a test somewhere in here to deal with RHS being a member of more than one edge
    except KeyError:
        clusters[edge[0]] = {edge[1],}

In [114]:
def genColorSet(colorCount):
    '''genColorSet(): Int --> SetOfRGBCodes
    Purpose: to generate a SetOfRGBCodes of count Int'''
    colors = set()
    
    for i in range(0,colorCount):
        r = str(hex(random.randint(1,255)))[2:]
        g = str(hex(random.randint(0,255)))[2:]
        b = str(hex(random.randint(0,255)))[2:]
        color = "#"+r+g+b
        colors.add(color)
        
    return colors

In [118]:
# Next, color-code each cluster.
colorSet = genColorSet(len(clusters))
clusterColors = dict()
for k,v in clusters.items():
    clusterColors[k] = {'members': v, 'color': colorSet.pop()}

In [120]:
# Then, for each edge, lookup and apply its cluster color.
for key,value in clusterColors.keys():
    for edge in G.edges():
        if edge[0] is key or edge[1] is key:
            e = G.get_edge(edge[0],edge[1])
            e.add_attr['color'] = value['color']
        elif edge[0] is in cluster

{'RFC1743': {'color': '#fd26ac', 'members': {'RFC1748'}},
 'RFC4833': {'color': '#fc49e0', 'members': {'RFC2132'}},
 'RFC0171': {'color': '#ebb3a5', 'members': {'RFC0114', 'RFC0264'}},
 'RFC0926': {'color': '#244da5', 'members': {'RFC0994'}},
 'RFC2738': {'color': '#e0f935', 'members': {'RFC2533'}},
 'RFC2836': {'color': '#a663e4', 'members': {'RFC3140'}},
 'RFC6302': {'color': '#b7b950', 'members': {'BCP0162'}},
 'RFC0883': {'color': '#a23d81', 'members': {'RFC1034', 'RFC1035'}},
 'RFC6270': {'color': '#3669a7', 'members': {'RFC1041', 'RFC1738', 'RFC2355'}},
 'RFC2598': {'color': '#aaa1e', 'members': {'RFC3246'}},
 'RFC3810': {'color': '#669e47', 'members': {'RFC2710'}},
 'RFC1360': {'color': '#b9b2f2', 'members': {'RFC1410'}},
 'RFC2504': {'color': '#628cf6', 'members': {'FYI0034'}},
 'RFC1292': {'color': '#4b6cf8', 'members': {'RFC1632'}},
 'RFC5011': {'color': '#4244', 'members': {'STD0074'}},
 'RFC5452': {'color': '#d1c87b', 'members': {'RFC2181'}},
 'RFC4244': {'color': '#367d18'

In [119]:
# Inspect the size of the graph to make sure it's sane
len(G)

8819

In [99]:
# N.B. As of this writing, this takes several minutes to run.

# Generate the graph.
G.layout()
G.write('rfc-visualization.dot')
G.draw('rfc-visualization.svg')