#### Imports

In [1]:
import requests
import pandas as pd
import pickle
from os import path
import networkx as nx
import matplotlib.pyplot as plt
import pydot
from networkx.drawing.nx_pydot import graphviz_layout
from dataclasses import dataclass
from joblib import Parallel, delayed
pd.set_option('display.max_colwidth', -1)
import plotly.graph_objects as go

#### Constants

In [2]:
API = "https://explorer.helium.foundation/api"

#### Helpers

In [68]:
def dashed(name):
    ''' Convert title name to lower dashed name '''
    return "-".join(name.lower().split(" "))

def get_loc_details(hotspots, name):
    ''' Get h3 str and city of given hotspot name '''
    h = next(x for x in hotspots if x['name'] == name)
    return (h['location'], h['long_city'], h['lat'], h['lng'])

def get_witnesses(hname, b58):
    ''' Get witnesses for given hotspot name and b58 addr '''
    r = requests.get("{}/witnesses/{}".format(API, b58))
    assert(r.status_code == 200)
    witnesses = r.json()['data']
    if len(witnesses) > 0:
        witnesses = [w['name'] for w in witnesses]
    else:
        witnesses = []
    return {"name": hname, "witnesses": witnesses}

@dataclass
class Node:
    ''' Class to represent a graph node '''
    loc: str
    name: str
    city: str
    lat: float
    lng: float

    # to make this class hashable
    def __eq__(self, other):
        return self.loc == other.loc and self.name == other.name
    def __hash__(self):
        return int(self.loc, 16)
    def to_dict(self):
        return {
            'name': self.name,
            'loc': self.loc,
            'lat': self.lat,
            'lng': self.lng,
            'city': self.city
        }

def create_or_fetch_witness_list(witness_list_pickle):
    ''' Check if we already have a pickled object for getting witness list faster '''
    if path.exists(witness_list_pickle):
        with open(witness_list_pickle, 'rb') as handle:
            witness_list = pickle.load(handle)
    else:
        # make the witness fetching marginally faster by parallelizing requests
        witness_list = Parallel(n_jobs=8)(delayed(get_witnesses)(n, a) for (n, a) in [(x['name'], x['address']) for x in hotspots])
        with open(witness_list_pickle, 'wb') as handle:
            pickle.dump(witness_list, handle, protocol=pickle.HIGHEST_PROTOCOL)
    return witness_list

def create_or_fetch_witness_dict(witness_list, witness_dict_pickle):
    ''' Check if we already have a pickled object for getting witness dict faster '''
    if path.exists(witness_dict_pickle):
        with open(witness_dict_pickle, 'rb') as handle:
            witness_dict = pickle.load(handle)
    else:
        # make the witness fetching marginally faster by parallelizing requests
        witness_dict = create_witness_dict(witness_list)
        with open(witness_dict_pickle, 'wb') as handle:
            pickle.dump(witness_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    return witness_dict

def create_or_fetch_graph(graph_pickle):
    ''' Check if we already have a pickled graph '''
    if path.exists(graph_pickle):
        return nx.read_gpickle(graph_pickle)
    else:
        G = nx.Graph()
        G.add_nodes_from(witnesses.keys())
        for k, v in witnesses.items():
            G.add_edges_from([(k, t) for t in v])
        nx.write_gpickle(G, graph_pickle, protocol=pickle.HIGHEST_PROTOCOL)
    return G

def create_witness_dict(witness_list):
    ''' Create witness dictionary from list of witnesses '''
    witnesses = {}
    for d in witness_list:
        h = d['name']
        ws = d['witnesses']
        (h_loc, h_city, h_lat, h_lng) = get_loc_details(hotspots, h)
        if h_loc:
            key = Node(name=h, loc=h_loc, city=h_city, lat= h_lat, lng= h_lng)
            if len(ws) > 0:
                nodes = []
                for w in ws:
                    (w_loc, w_city, w_lat, w_lng) = get_loc_details(hotspots, w)
                    node = Node(name=w, loc=w_loc, city=w_city, lat= w_lat, lng= w_lng)
                    nodes.append(node)
                witnesses[key] = nodes
            else:
                witnesses[key] = []
    return witnesses

def create_edges_df(sub_g):
    conns = []
    for (start, end) in list(sub_g.edges()):
        conns.append({
            'from': start.name,
            'to': end.name,
            'start_lat': start.lat,
            'start_lng': start.lng,
            'end_lat': end.lat,
            'end_lng': end.lng
        })
    return pd.DataFrame(conns)

def create_node_df(graph):
    return pd.DataFrame.from_records([n.to_dict() for n in list(graph.nodes())])

def plot(big_g, scc, index, city):
    sub_g = big_g.subgraph(scc[index])
    df = create_node_df(sub_g)
    df_edges = create_edges_df(sub_g)

    fig = go.Figure()
    fig.data = []

    node_trace = go.Scattergeo(
        locationmode = 'USA-states',
        lon = df.lng,
        lat = df.lat,   
        text = df.name,
        hoverinfo = 'text',
        mode = 'markers',
        marker = dict(size = 10, color = 'red', symbol='hexagon-dot')
    )
    
    fig.add_trace(node_trace)

    edges = []
    for i in range(len(df_edges)):
        edge_trace = go.Scattergeo(
            lon = [df_edges['start_lng'][i], df_edges['end_lng'][i]],
            lat = [df_edges['start_lat'][i], df_edges['end_lat'][i]],
            mode = 'lines',
            line = dict(width = 1, color = 'blue')
        )
        
        fig.add_trace(edge_trace)

    fig.update_layout(
        title_text = 'Connectivity for {}'.format(city),
        showlegend = False,
        geo = go.layout.Geo(
            scope = 'north america',
            projection_type = 'azimuthal equal area',
            showland = True,
            landcolor = 'rgb(243, 243, 243)',
            countrycolor = 'rgb(204, 204, 204)',
        ),
    )
    fig.show()

#### Hotspots

In [50]:
hr = requests.get("{}/hotspots".format(API))
assert(hr.status_code == 200)
hotspots0 = hr.json()['data']
hotspots = []
for h in hotspots0:
    old_name = h['name']
    h['name'] = dashed(old_name)
    hotspots.append(h)

#### Witnesses file pickled 3033 hotspots @ height 198410

In [51]:
# check if we have witnesses locally
witness_list = create_or_fetch_witness_list('files/witnesses.pickle')
witnesses = create_or_fetch_witness_dict(witness_list, 'files/witness_dict.pickle')
G = create_or_fetch_graph('files/witness_graph.gpickle')

##### Is the network connected?

In [52]:
nx.is_connected(G)

False

##### What is the cluster average?

In [53]:
nx.algorithms.cluster.average_clustering(G)

0.4062333971727669

#### How many total components? For example: a graph with three compents:
<img src="files/3-component-graph.png" style="width:400px">

In [60]:
CC = list(nx.connected_components(G))
SCC = sorted(cc, key=len, reverse=True)
print('Total connected components (sub-graphs): {}'.format(len(SCC)))
print('Top 50 biggest sub graphs: \n{}'.format([len(i) for i in SCC[:50]]))

Total connected components (sub-graphs): 1222
Top 50 biggest sub graphs: 
[351, 286, 154, 111, 94, 82, 62, 55, 35, 33, 31, 27, 26, 22, 22, 20, 20, 20, 17, 14, 13, 12, 12, 10, 10, 10, 10, 10, 9, 7, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3]


In [69]:
# for i in range(5, 10):
#     plot(G, SCC, i, 'foo')
plot(G, SCC, 6, 'boston')