## Create spatial weights matrix

Create weights up to order 5 and store the order to allow distance-weighting of the lag.

In [6]:
import geopandas as gpd
import numpy as np
from libpysal import graph
import pandas as pd
import h3

In [2]:
data_folder = "../../../demoland_data"

Load the data

In [3]:
data = gpd.read_parquet(f"{data_folder}/h3/grid_complete.parquet")

Default matrix containing all the information

In [4]:
%%time
neighbors = {}
weights = {}
for ix in data.index:
    rings = h3.hex_range_distances(ix, 5)
    for i, ring in enumerate(rings):
        if i == 0:
            neighbors[ix] = []
            weights[ix] = []
        else:
            neighbors[ix].extend(list(ring))
            weights[ix].extend([i] * len(ring))

CPU times: user 2min 3s, sys: 3.6 s, total: 2min 6s
Wall time: 2min 6s


In [7]:
matrix = graph.Graph.from_dicts(neighbors, weights)

Save to Parquet.

In [8]:
matrix.to_parquet(f"{data_folder}/h3/grid_adjacency_graph.parquet")

Transformation to binary and inverse distance weighted.

In [9]:
%time binary = matrix.transform("b")
%time inverse = graph.Graph(1 / matrix.adjacency)

CPU times: user 1min, sys: 5.15 s, total: 1min 5s
Wall time: 1min 5s
CPU times: user 38 s, sys: 3.74 s, total: 41.8 s
Wall time: 41.7 s


In [13]:
inverse._adjacency

focal            neighbor       
89187290337ffff  89187290edbffff    1.000000
                 89187290ed3ffff    0.500000
                 8918729016fffff    0.333333
                 891872903afffff    1.000000
                 891872903a7ffff    0.500000
                                      ...   
891946cb263ffff  891946c94dbffff    0.333333
                 891946c94d3ffff    0.500000
                 891946cb26fffff    1.000000
                 891973349a7ffff    0.500000
                 891946cb26bffff    1.000000
Name: weight, Length: 123282816, dtype: float64

Save to Parquet.

In [10]:
binary.to_parquet(f"{data_folder}/h3/grid_adjacency_binary.parquet")
inverse.to_parquet(f"{data_folder}/h3/grid_adjacency_inverse.parquet")

## Get subsets of weights per distance

In [16]:
%%time
for i in range(1, 5):
    mask = inverse._adjacency < (1 / i)
    
    bin_adj = binary._adjacency.copy()
    # assign zeros to neighbors furhter away 
    bin_adj[mask] = 0
    # remove zeros but keep self-loops for isolates
    adj = bin_adj.reset_index(level=1)
    isolates = adj.index == adj.neighbor
    zeros = mask.values != isolates
    # create graph from adjusted adjacency
    graph.Graph(bin_adj[~zeros]).to_parquet(
        f"{data_folder}/h3/grid_adjacency_binary_k{i}.parquet"
    )
        
    inv_adj = inverse._adjacency.copy()
    # assign zeros to neighbors furhter away 
    inv_adj[mask] = 0
    # remove zeros but keep self-loops for isolates
    adj = inv_adj.reset_index(level=1)
    isolates = adj.index == adj.neighbor
    zeros = mask.values != isolates
    # create graph from adjusted adjacency
    graph.Graph(inv_adj[~zeros]).to_parquet(
        f"{data_folder}/h3/grid_adjacency_inverse_k{i}.parquet"
    )

CPU times: user 5min 34s, sys: 41.1 s, total: 6min 15s
Wall time: 6min 15s
