In [68]:
import pygraphviz as pgv
import xmltodict as xtd
import requests
from collections import OrderedDict

In [69]:
# Read the RFC Index as XML and convert to a python Dict
#rfcIndexUrl = 'https://www.rfc-editor.org/in-notes/rfc-index.xml'
#xmlData = requests.get(rfcIndexUrl)
#dictData = xtd.parse(xmlData.text)
with open('rfc-index.xml','r') as xmlData:
    dictData = xtd.parse(xmlData.read())
rfcIndex = dictData['rfc-index']

In [70]:
# The RFC Index has the following elements of interest:
rfcIndex.keys()

odict_keys(['@xmlns', '@xmlns:xsi', '@xsi:schemaLocation', 'bcp-entry', 'fyi-entry', 'rfc-entry', 'rfc-not-issued-entry', 'std-entry'])

In [71]:
# Import color definitions to be used in coloring edges and nodes.
with open('svg-colors.txt','r') as colorFile:
    colors = colorFile.readlines()
    colors = [ color.strip() for color in colors]

In [76]:
# Initialize a new digraph, "G", to represent BCPs and RFCs as nodes;
# is-also's, obsolescence, and updates as edges.
G = pgv.AGraph(directed=True)
G.graph_attr['label']="RFC Directed Graph"

In [73]:
def graphNodes(graph, rfcIndexNode, attributes):
    '''graphNodes(): PyGraphVizGraph ListOfOrderedDicts ListOfStr--> PyGraphVizGraph
    Purpose: for each element of rfcIndexNode, add its 'doc-id' key value as a node
             to graph, and for each key value in attributes, add the key values for
             that node's attribute to graph as an edge, returning the updated graph.'''
    for node in rfcIndexNode:
        # Add the node to the graph
        try:
            graph.add_node(node['doc-id'])
        except:
            print("error adding node %s. No 'doc-id' string?" % node)
        
        # Now, add edges
        for attr in attributes:
            try:
                if isinstance(node[attr], OrderedDict):
                    if isinstance(node[attr]['doc-id'],str):
                        G.add_edge(node['doc-id'],node[attr]['doc-id'],label=attr)
                    elif isinstance(node[attr]['doc-id'],list):
                        for standard in node[attr]['doc-id']:
                            G.add_edge(node['doc-id'],standard,label=attr)
                    else:
                        print("error: %s is neither an string nor a list for node %s" % (node[attr], node['doc-id'] ))
                else:
                    print("error: %s isn't an OrderedDict for node %s" % (node[attr], node['doc-id']))
            except Exception as e:
                print("exception at node %s: no attribute '%s'" % (node['doc-id'],attr))
            
    return(graph)
                        

In [77]:
G = graphNodes(G, rfcIndex['bcp-entry'], ['is-also'])
G = graphNodes(G, rfcIndex['fyi-entry'], ['is-also'])
G = graphNodes(G, rfcIndex['rfc-entry'], ['is-also','obsoleted-by','updates'])

exception at node BCP0001: no attribute 'is-also'
exception at node BCP0002: no attribute 'is-also'
exception at node BCP0012: no attribute 'is-also'
exception at node BCP0094: no attribute 'is-also'
exception at node BCP0115: no attribute 'is-also'
exception at node RFC0001: no attribute 'is-also'
exception at node RFC0001: no attribute 'obsoleted-by'
exception at node RFC0001: no attribute 'updates'
exception at node RFC0002: no attribute 'is-also'
exception at node RFC0002: no attribute 'obsoleted-by'
exception at node RFC0002: no attribute 'updates'
exception at node RFC0003: no attribute 'is-also'
exception at node RFC0003: no attribute 'updates'
exception at node RFC0004: no attribute 'is-also'
exception at node RFC0004: no attribute 'obsoleted-by'
exception at node RFC0004: no attribute 'updates'
exception at node RFC0005: no attribute 'is-also'
exception at node RFC0005: no attribute 'obsoleted-by'
exception at node RFC0005: no attribute 'updates'
exception at node RFC0006: no 

In [20]:
# For each element in the BCP Entry list, add it to the graph, and build
# edges for is-also's to RFC nodes.
for bcp in rfcIndex['bcp-entry']:
    G.add_node(bcp['doc-id'])
    try:
        if isinstance(bcp['is-also'], OrderedDict):
            if isinstance(bcp['is-also']['doc-id'],str):
                G.add_edge(bcp['doc-id'],bcp['is-also']['doc-id'],label='is-also')
            elif isinstance(bcp['is-also']['doc-id'],list):
                for rfc in bcp['is-also']['doc-id']:
                    G.add_edge(bcp['doc-id'],rfc,label='is-also')
            else:
                print("error: is-also has unexpected format")
    except KeyError:
        pass

In [7]:
# For each element in the FYI Entry list, add it to the graph, and build
# edges for is-also's to RFC nodes.
for fyi in rfcIndex['fyi-entry']:
    G.add_node(fyi['doc-id'])
    try:
        if isinstance(fyi['is-also'], OrderedDict):
            if isinstance(fyi['is-also']['doc-id'],str):
                G.add_edge(fyi['doc-id'],fyi['is-also']['doc-id'],label='is-also')
            elif isinstance(fyi['is-also']['doc-id'],list):
                for rfc in fyi['is-also']['doc-id']:
                    G.add_edge(fyi['doc-id'],rfc,label='is-also')
            else:
                print("error: is-also has unexpected format")
    except KeyError:
        pass

In [8]:
# N.B. as of this writing, this step takes a long time.

for rfc in rfcIndex['rfc-entry']:
    try:
        G.add_node(rfc['doc-id'])
    except:
        print("error reached")
    try:
        if isinstance(rfc['is-also'], OrderedDict):
            if isinstance(rfc['is-also']['doc-id'],str):
                G.add_edge(rfc['doc-id'],rfc['is-also']['doc-id'],label='is-also')
            elif isinstance(rfc['is-also']['doc-id'],list):
                for standard in rfc['is-also']['doc-id']:
                    G.add_edge(rfc['doc-id'],standard,label='is-also')
            else:
                print("error: is-also has unexpected format")
        if isinstance(rfc['obsoletes'],OrderedDict):
            if isinstance(rfc['obsoletes']['doc-id'],str):
                G.add_edge(rfc['doc-id'],rfc['obsoletes']['doc-id'],label='obsoletes')
            elif isinstance(rfc['obsoletes']['doc-id'],list):
                for standard in rfc['oboletes']['doc-id']:
                    G.add_edge(rfc['doc-id'],standard,label='obsoletes')
            else:
                print("error: obsoletes has unexpected format")
                    
    except Exception as e:
        pass
#obsoletes', OrderedDict([('doc-id', 'RFC0003')])),
#              ('obsoleted-by', OrderedDict([('doc-id', 'RFC0016')])),
#              ('updated-by',

In [82]:
#[rfc['doc-id'] for rfc in rfcIndex['rfc-entry']]
len(G)

8824

In [83]:
# N.B. As of this writing, this takes several minutes to run.

# Generate the graph.
G.layout()
G.write('rfc-visualization.dot')
G.draw('rfc-visualization.svg')

In [67]:
rfcIndex['rfc-entry'][7653]

OrderedDict([('doc-id', 'RFC7829'),
             ('title',
              'SCTP-PF: A Quick Failover Algorithm for the Stream Control Transmission Protocol'),
             ('author',
              [OrderedDict([('name', 'Y. Nishida')]),
               OrderedDict([('name', 'P. Natarajan')]),
               OrderedDict([('name', 'A. Caro')]),
               OrderedDict([('name', 'P. Amer')]),
               OrderedDict([('name', 'K. Nielsen')])]),
             ('date', OrderedDict([('month', 'April'), ('year', '2016')])),
             ('format', OrderedDict([('file-format', ['ASCII', 'HTML'])])),
             ('page-count', '23'),
             ('keywords',
              OrderedDict([('kw',
                            ['SCTP',
                             'Failover',
                             'multipath',
                             'multihoming',
                             'Potentially Failed'])])),
             ('abstract',
              OrderedDict([('p',
                        