In [52]:
import pygraphviz as pgv
import xmltodict as xtd
import requests
from collections import OrderedDict

In [53]:
# Read the RFC Index as XML and convert to a python Dict
#rfcIndexUrl = 'https://www.rfc-editor.org/in-notes/rfc-index.xml'
#xmlData = requests.get(rfcIndexUrl)
#dictData = xtd.parse(xmlData.text)
with open('rfc-index.xml','r') as xmlData:
    dictData = xtd.parse(xmlData.read())
rfcIndex = dictData['rfc-index']

In [54]:
# The RFC Index has the following elements of interest:
rfcIndex.keys()

odict_keys(['@xmlns', '@xmlns:xsi', '@xsi:schemaLocation', 'bcp-entry', 'fyi-entry', 'rfc-entry', 'rfc-not-issued-entry', 'std-entry'])

In [55]:
# Import color definitions to be used in coloring edges and nodes.
with open('svg-colors.txt','r') as colorFile:
    colors = colorFile.readlines()
    colors = [ color.strip() for color in colors]

In [66]:
# Initialize a new digraph, "G", to represent BCPs and RFCs as nodes;
# is-also's, obsolescence, and updates as edges.
G = pgv.AGraph(directed=True)
G.graph_attr['label']="RFC Directed Graph"
G.graph_attr['overlap']='false'

In [67]:
def graphNodes(graph, rfcIndexNode, attributes):
    '''graphNodes(): PyGraphVizGraph ListOfOrderedDicts ListOfStr--> PyGraphVizGraph
    Purpose: for each element of rfcIndexNode, add its 'doc-id' key value as a node
             to graph, and for each key value in attributes, add the key values for
             that node's attribute to graph as an edge, returning the updated graph.'''
    for node in rfcIndexNode:
        # Add the node to the graph
        try:
            graph.add_node(node['doc-id'])
        except:
            print("error adding node %s. No 'doc-id' string?" % node)
        
        # Now, add edges
        for attr in attributes:
            try:
                if isinstance(node[attr], OrderedDict):
                    if isinstance(node[attr]['doc-id'],str):
                        G.add_edge(node['doc-id'],node[attr]['doc-id'],label=attr)
                    elif isinstance(node[attr]['doc-id'],list):
                        for standard in node[attr]['doc-id']:
                            G.add_edge(node['doc-id'],standard,label=attr)
                    else:
                        print("error: %s is neither an string nor a list for node %s" % (node[attr], node['doc-id'] ))
                else:
                    print("error: %s isn't an OrderedDict for node %s" % (node[attr], node['doc-id']))
            except KeyError:
                pass
            
    return(graph)
                        

In [68]:
#G = graphNodes(G, rfcIndex['bcp-entry'], ['is-also'])
#G = graphNodes(G, rfcIndex['fyi-entry'], ['is-also'])
G = graphNodes(G, rfcIndex['rfc-entry'], ['is-also','obsoleted-by','updates'])

In [69]:
# Inspect the size of the graph to make sure it's sane
len(G)

8819

In [70]:
# N.B. As of this writing, this takes several minutes to run.

# Generate the graph.
G.layout()
G.write('rfc-visualization.dot')
G.draw('rfc-visualization.svg')