In [1]:
!pip install icecream enlighten networkx
import enlighten, networkx as nx, pandas as pd, plotly.graph_objects as go, re, requests
from bs4 import BeautifulSoup
from icecream import ic

pd.set_option('display.max_colwidth', 1000)



In [2]:
status = {
    'Best Current Practice': '#1f77b4',    # Blue
    'Draft Standard': '#ff7f0e',           # Orange
    'Experimental': '#9467bd',             # Purple
    'Historic': '#7f7f7f',                 # Gray
    'Informational': '#2ca02c',            # Green
    'Internet Standard': '#d62728',        # Red
    'Not Issued': '#bdbdbd',               # Light Gray
    'Proposed Standard': '#ffbb78',        # Yellow
    'Unknown': '#000000'                   # Black
}

In [3]:
htmlText = requests.get('https://www.rfc-editor.org/search/rfc_search_detail.php?page=All&pub_date_type=any&sortkey=Number&sorting=ASC').content
soup = BeautifulSoup(htmlText, 'lxml')

In [4]:
page = soup.find("table", class_="gridtable")
i = 0
items = []
for tr in page.find_all("tr"):
    if i == 0:
        i += 1
        continue
    j = 0
    content = []
    for td in tr.find_all("td"):
        if j == 0 or j == 2 or j == 4:
            content.append(td.text.strip())
        elif j == 5:
            if td.text.strip() == '':
                content.append('')
            else:
                content.append(td.text.replace('Errata, ','').replace('Errata','').strip())
        elif j == 6:
            content.append(td.text.split('(')[0].strip())
        j += 1
    items.append(content)
    i += 1
df = pd.DataFrame(items, columns=['RFC','Title','Date','Info','Status'])
for i, r in df.iterrows():
    if r['Status'] == None:
        df.iloc[i] = ['RFC 1685','Writing X.400 O/R Names','August 1994','','Informational']
del content, htmlText, soup, tr, td, items, i, j, page
df.tail()

Unnamed: 0,RFC,Title,Date,Info,Status
9574,RFC 9586,IMAP Extension for Using and Returning Unique Identifiers (UIDs) Only,May 2024,,Experimental
9575,RFC 9587,YANG Data Model for OSPFv3 Extended Link State Advertisements (LSAs),June 2024,,Proposed Standard
9576,RFC 9589,On the Use of the Cryptographic Message Syntax (CMS) Signing-Time Attribute in Resource Public Key Infrastructure (RPKI) Signed Objects,May 2024,Updates RFC 6488,Proposed Standard
9577,RFC 9590,IMAP Extension for Returning Mailbox METADATA in Extended LIS,May 2024,,Proposed Standard
9578,RFC 9598,Internationalized Email Addresses in X.509 Certificates,May 2024,"Obsoletes RFC 8398, Updates RFC 5280",Proposed Standard


In [5]:
updatesPattern = r'(Updates|Updated\sby)\s(RFC\s\d+(?:,\sRFC\s\d+)*)'
obsoletesPattern = r'(Obsoletes|Obsoleted\sby)\s(RFC\s\d+(?:,\sRFC\s\d+)*)'

G = nx.Graph()
for i, r in df.iterrows():
    G.add_node(r["RFC"], title=f'{r["RFC"]}', date=f'{r["Date"]}', status=f'{r["Status"]}', color=status[r['Status']], size=20)
for _, row in df.iterrows():
    if row['Info'] != '':
        updates = re.findall(updatesPattern, row['Info'])
        obsoletes = re.findall(obsoletesPattern, row['Info'])
        for i in updates:
            for j in i[1].split(', '):
                G.add_edge(row['RFC'], j)
        for i in obsoletes:
            for j in i[1].split(', '):
                G.add_edge(row['RFC'], j)

node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers+text',
    textposition="top center",
    hoverinfo='text',
    hovertext='hovertext',
    marker=dict(
        showscale=True,
        colorscale='YlGnBu',
        size=[],
        color=[],
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='top'
        ),
        line_width=2))
edge_trace = go.Scatter(
    x=[],
    y=[],
    line=dict(width=1,color='black'),
    hoverinfo='none',
    mode='lines')
del updates, obsoletes, updatesPattern, obsoletesPattern

In [6]:
pos = nx.random_layout(G)
regExp = 'RFC\\s*(\\d+)'

node_trace['x'] = [pos[node][0] for node in G.nodes()]
node_trace['y'] = [pos[node][1] for node in G.nodes()]
node_trace['marker']['color'] = [G.nodes[node['RFC']]['color'] for _,node in df.iterrows()]
node_trace['marker']['size'] = [G.nodes[node['RFC']]['size'] for _,node in df.iterrows()]
node_trace['text'] = [f"<a href='https://www.rfc-editor.org/info/rfc{re.search(regExp, G.nodes[node['RFC']]['title'], re.IGNORECASE).group(1)}' target='_blank' style='text-decoration: none; color: inherit;'>{G.nodes[node['RFC']]['title']}</a>" for _,node in df.iterrows()]
node_trace['hovertext'] = [f"{G.nodes[node['RFC']]['title']}<br>{G.nodes[node['RFC']]['date']}<br>{G.nodes[node['RFC']]['status']}" for _,node in df.iterrows()]
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_trace['x'] += tuple([x0, x1, None])
    edge_trace['y'] += tuple([y0, y1, None])
del pos, regExp, edge, x0, y0, x1, y1

In [7]:
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    title='RFCs Graph',
                    titlefont_size=30,
                    showlegend=False,
                    hovermode='closest',
                    annotations=[dict(
                        text="NetworkX + Plotly",
                        showarrow=False,
                        xref="paper", yref="paper",
                        x=0.005, y=-0.002)],
                    xaxis=dict(showgrid=False, zeroline=False),
                    yaxis=dict(showgrid=False, zeroline=False),
                    width=15360,
                    height=8640))
fig.show()