In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx

adj_matrix = np.load("../gnn_package/data/preprocessed/test/small_adj_matrix.npy")
node_ids = np.load("../gnn_package/data/preprocessed/test/small_node_ids.npy")
names_dict = json.load(open("../gnn_package/data/urban_observatory/sensor_names.json"))
node_names = [names_dict[str(node_id)] for node_id in node_ids]
adj_matrix.max()

In [None]:
def compute_adjacency_matrix(
    adj_matrix: np.ndarray, sigma_squared=0.1, epsilon=0.95
) -> np.ndarray:
    """
    Computes a weighted adjacency matrix from a distance matrix using a Gaussian kernel function.
    The function first normalizes distances, then applies a Gaussian decay and thresholds weak connections.

    Parameters:
    -----------
    adj_matrix : np.ndarray
        Input matrix of distances between nodes
    sigma_squared : float, default=0.1
        Variance parameter that controls the rate of weight decay with distance.
        Smaller values cause weights to decay more quickly, while larger values
        preserve stronger long-range connections.
    epsilon : float, default=0.95
        Threshold for keeping connections. Any connection with weight below epsilon
        is removed (set to 0). For small geographical areas, a lower value like 0.5
        may be more appropriate to ensure connectivity.

    Returns:
    --------
    np.ndarray
        Weighted adjacency matrix where weights are computed using a Gaussian kernel
        function (e^(-d²/σ²)) and thresholded by epsilon. Self-connections (diagonal
        elements) are set to 0.

    Notes:
    ------
    - Distances are normalized by dividing by 10000 before computation
    - The Gaussian kernel means weights decay exponentially with squared distance
    - Higher epsilon values lead to sparser graphs as more weak connections are removed
    """
    # sigma_squared is the variance of the Gaussian kernel which controls how quickly the connection strength decays with distance
    # smaller sigma squared means weights decay more quickly with distance
    # epsilon is the threshold for the weights
    # a high value e.g. 0.95 means that only very strong connections are kept
    # for small areas epsilon=0.5 will likely be fully connected
    a = adj_matrix / 10000  # Normalize distances
    a_squared = a * a  # Square distances
    n = a.shape[0]
    w_mask = np.ones([n, n]) - np.identity(n)  # Mask of ones except for the diagonal
    w = (
        np.exp(-a_squared / sigma_squared)
        * (np.exp(-a_squared / sigma_squared) >= epsilon)
        * w_mask
    )  # Test whether the weights are greater than epsilon, apply the mask, and multiply again to return real values of weights
    return w

In [None]:
adj = compute_adjacency_matrix(adj_matrix)
adj[0].max()

In [None]:
plt.figure(figsize=(8, 8))
cax = plt.matshow(adj, False)
plt.colorbar(cax)
plt.xlabel("Sensor station")
plt.ylabel("Sensor station")

In [None]:
def plot_graph(adj):
    plt.figure(figsize=(10, 5))
    rows, cols = np.where(adj > 0)
    edges = zip(rows.tolist(), cols.tolist())
    G = nx.Graph()
    G.add_edges_from(edges)
    nx.draw(G, with_labels=True)
    plt.show()


plot_graph(adj)

In [None]:
node_names

In [None]:
from uoapi import APIClient

client = APIClient()

# node_names = node_names[:2]

combined_df = pd.DataFrame()
for name in node_names:
    response = client.get_individual_raw_sensor_data(name, last_n_days=365)
    data = response["sensors"][0]["data"]
    if len(data) == 0:
        continue
    print(f"Adding data for {name}")
    print(f"Data length: {len(data)}")
    print(f"Data keys: {data.keys()}")
    print(f"Data: {data['Walking']}")
    df = pd.DataFrame(data["Walking"])
    combined_df = pd.concat([combined_df, df], axis=1)

In [None]:
combined_df