### Imports

In [1]:
import requests
import pandas as pd
import pickle
import copy
import random

import networkx as nx
import matplotlib.pyplot as plt
import pydot
import plotly.graph_objects as go

from os import path
from networkx.drawing.nx_pydot import graphviz_layout
from dataclasses import dataclass
from joblib import Parallel, delayed

pd.set_option('display.max_colwidth', -1)

### Constants

In [2]:
API = "https://explorer.helium.foundation/api"

In [3]:
def dashed(name):
    ''' Convert title name to lower dashed name '''
    return "-".join(name.lower().split(" "))

def get_hotspot(hotspots, name):
    ''' Get hotspot by name (lower dashed) '''
    return next(x for x in hotspots if x['name'] == name) 

In [4]:
# get hotspots once
hr = requests.get("{}/hotspots".format(API))
assert(hr.status_code == 200)
hotspots0 = hr.json()['data']
HOTSPOTS = []
for h in hotspots0:
    old_name = h['name']
    h['name'] = dashed(old_name)
    HOTSPOTS.append(h)

### Hotspots dataframe

In [5]:
df = pd.DataFrame(HOTSPOTS)

### Dataframe helpers

In [6]:
def get_b58(hdf, name):
    ''' Get b58 address for given hotspot name '''
    return hdf[hdf['name'] == name].iloc[0]['address']

def get_loc_details(hdf, name):
    ''' Get location details for a given hotspot name '''
    h = hdf[hdf['name'] == name].iloc[0]
    return (h['location'], h['long_city'], h['lat'], h['lng'])

### Classes

In [7]:
@dataclass
class Witness:
    ''' Dataclass for holding witness information '''
    name: str
    address: str
    cnt: int

    def __eq__(self, other):
        return self.name == other.name and self.address == other.address
    def __hash__(self):
        return self.address
    def to_dict(self):
        return {
            'name': self.name,
            'address': self.address,
            'cnt': self.cnt
        }

@dataclass
class Node:
    ''' Class to represent a graph node '''
    loc: str
    name: str
    city: str
    lat: float
    lng: float

    # to make this class hashable
    def __eq__(self, other):
        return self.loc == other.loc and self.name == other.name
    def __hash__(self):
        return int(self.loc, 16)
    def to_dict(self):
        return {
            'name': self.name,
            'loc': self.loc,
            'lat': self.lat,
            'lng': self.lng,
            'city': self.city
        }

def new_node(hdf, name):
    (h_loc, h_city, h_lat, h_lng) = get_loc_details(hdf, name)
    return Node(name=name, loc=h_loc, city=h_city, lat=h_lat, lng=h_lng)

### Functions

In [8]:
def get_witnesses(hdf, hname):
    ''' Get witnesses for given hotspot name '''
    b58 = get_b58(hdf, hname)
    r = requests.get("{}/witnesses/{}".format(API, b58))
    assert(r.status_code == 200)
    witnesses_data = r.json()['data']
    witnesses = []
    for w in witnesses_data:
        hist = w['hist']
        cnt = sum(hist.values())
        w_name = dashed(w['name'])
        w_address = w['address']
        witnesses.append(Witness(name=w_name, address=w_address, cnt=cnt))
    return {"name": hname, "witnesses": witnesses}

def create_or_fetch_weighted_witness_list(hdf, witness_list_pickle_path):
    ''' Check if we already have a pickled object for getting witness list faster '''
    if path.exists(witness_list_pickle_path):
        with open(witness_list_pickle_path, 'rb') as handle:
            witness_list = pickle.load(handle)
    else:
        # make the witness fetching marginally faster by parallelizing requests
        witness_list = Parallel(n_jobs=8)(delayed(get_witnesses)(hdf, n) for n in [i for i in hdf.name])
        with open(witness_list_pickle_path, 'wb') as handle:
            pickle.dump(witness_list, handle, protocol=pickle.HIGHEST_PROTOCOL)
    return witness_list

def create_or_fetch_weighted_witness_dict(hdf, witness_list, witness_dict_pickle_path):
    ''' Check if we already have a pickled object for getting witness dict faster '''
    if path.exists(witness_dict_pickle_path):
        with open(witness_dict_pickle_path, 'rb') as handle:
            witness_dict = pickle.load(handle)
    else:
        # make the witness fetching marginally faster by parallelizing requests
        witness_dict = create_witness_dict(hdf, witness_list)
        with open(witness_dict_pickle_path, 'wb') as handle:
            pickle.dump(witness_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    return witness_dict

def create_witness_dict(hdf, witness_list):
    ''' Create witness dictionary from list of witnesses '''
    witnesses = {}
    for d in witness_list:
        h = d['name']
        ws = d['witnesses']
        (h_loc, h_city, h_lat, h_lng) = get_loc_details(hdf, h)
        if h_loc:
            key = Node(name=h, loc=h_loc, city=h_city, lat= h_lat, lng= h_lng)
            if len(ws) > 0:
                nodes = []
                for w in ws:
                    (w_loc, w_city, w_lat, w_lng) = get_loc_details(hdf, w.name)
                    node = Node(name=w.name, loc=w_loc, city=w_city, lat= w_lat, lng= w_lng)
                    nodes.append((node, w.cnt))
                witnesses[key] = nodes
            else:
                witnesses[key] = []
    return witnesses

def create_or_fetch_graph(witness_dict, graph_pickle_path):
    ''' Check if we already have a pickled graph '''
    if path.exists(graph_pickle_path):
        return nx.read_gpickle(graph_pickle_path)
    else:
        G = nx.Graph()
        G.add_nodes_from(witness_dict.keys())
        for k, v in witness_dict.items():
            G.add_weighted_edges_from([(k, t, w) for (t, w) in v])
        nx.write_gpickle(G, graph_pickle_path, protocol=pickle.HIGHEST_PROTOCOL)
    return G

def subgraph(big_graph, scc, rank=0):
    return big_graph.subgraph(scc[rank])

def create_node_df(graph):
    return pd.DataFrame.from_records([n.to_dict() for n in list(graph.nodes())])

def create_edges_df(graph):
    conns = []
    for (start, end, data) in graph.edges(data=True):
        conns.append({
            'from': start.name,
            'to': end.name,
            'start_lat': start.lat,
            'start_lng': start.lng,
            'end_lat': end.lat,
            'end_lng': end.lng,
            'cnt': data['weight']
        })
    return pd.DataFrame(conns)

def plot(graph, city):
    df_nodes = create_node_df(graph)
    df_edges = create_edges_df(graph)

    rando_center = df_nodes.iloc[0]
    center_lat = rando_center['lat']
    center_lng = rando_center['lng']

    mapbox_access_token = open(".mapbox_token").read()

    fig = go.Figure()
    fig.data = []

    node_trace = go.Scattermapbox(
        lon = df_nodes.lng,
        lat = df_nodes.lat,   
        textfont = {'size': 8},
        text = df_nodes.name.values,
        mode = "markers+text",
        textposition='top center',
#         line = dict(width = 1, color = 'green'),
        hoverinfo = 'text',
        marker = go.scattermapbox.Marker(size=15, color='red')
    )

    fig.add_trace(node_trace)

    edges = []
    for i in range(len(df_edges)):
        
        edge_trace = go.Scattermapbox(
            lon = [df_edges['start_lng'][i], df_edges['end_lng'][i]],
            lat = [df_edges['start_lat'][i], df_edges['end_lat'][i]],
            mode = 'lines+text',
            hoverinfo = 'text',
            text = "cnt: {}<br>from:{}<br>to:{}".format(df_edges['cnt'][i],
                                      df_edges['from'][i],
                                      df_edges['to'][i]),
            line = dict(width = 3, color = 'blue'),
            opacity = float(df_edges['cnt'][i]) / float(df_edges['cnt'].max())
        )

        fig.add_trace(edge_trace)

    fig.update_layout(
        title_text = 'Connectivity for {}'.format(city),
        showlegend = False,
        mapbox = go.layout.Mapbox(
            accesstoken=mapbox_access_token,
            bearing=0,
            center=go.layout.mapbox.Center(
                lat=center_lat,
                lon=center_lng
            ),
            pitch=0,
            zoom=9
        ),
    )
    fig.show()

### Do the thing here in order

In [9]:
witness_list = create_or_fetch_weighted_witness_list(df, 'files/weighted_witness_list.pickle')
witness_dict = create_or_fetch_weighted_witness_dict(df, witness_list, 'files/weighted_witness_dict.pickle')
G = create_or_fetch_graph(witness_dict, 'files/weighted_witness_graph.gpickle')
# Keep a copy of G for safekeeping, DONT modify it
_G = copy.deepcopy(G)

In [10]:
nx.is_weighted(G)

True

In [11]:
nx.is_connected(G)

False

In [12]:
nx.algorithms.cluster.average_clustering(G)

0.4045180395173635

In [13]:
CC = list(nx.connected_components(_G))
SCC = sorted(CC, key=len, reverse=True)
print('Total connected components (sub-graphs): {}'.format(len(SCC)))

sub_components = []
for i in SCC:
    # we'll use this as the city, whatever
    rand_node = random.sample(i, 1)[0]
    sub_components.append({
        'city': rand_node.city,
        'cnt': len(i)
    })
rank_df = pd.DataFrame(sub_components)
TOP = 20
print('Top {} sub components'.format(TOP))
rank_df.head(TOP)

Total connected components (sub-graphs): 1226
Top 20 sub components


Unnamed: 0,city,cnt
0,Union City,350
1,Edgewater,287
2,Modesto,146
3,Austin,111
4,San Diego,95
5,Chicago,80
6,Boston,59
7,Baltimore,55
8,Turlock,35
9,Oakdale,33


In [14]:
# # _G: big graph, don't plot that
# # SCC: sorted sub-components
# # index = index from rank_df
# index = 12
# city = sub_components[index]['city']
# G = subgraph(_G, SCC, index)
# plot(G, city)

### Gephi dataframe

In [23]:
df_nodes = create_node_df(_G)

In [24]:
df_nodes = df_nodes.drop(columns=['loc', 'city'])

In [25]:
df_nodes = df_nodes.rename(columns={'lat': 'latitude', 'lng': 'longitude', 'name': 'label'})

In [26]:
df_nodes

Unnamed: 0,label,latitude,longitude
0,little-candy-jellyfish,33.162353,-95.945712
1,dandy-wooden-lemur,30.293851,-97.857293
2,long-eggshell-skunk,47.332995,-122.264164
3,witty-basil-panther,40.785371,-73.949920
4,rich-pineapple-dinosaur,41.896940,-87.632206
...,...,...,...
2976,sparkly-aqua-wren,37.875836,-122.291543
2977,mini-currant-lizard,37.786621,-122.392090
2978,prehistoric-mossy-snail,37.852768,-122.282166
2979,mythical-tin-hawk,37.852817,-122.281954
