# Brownian Motion

Two tasks: 
* What's the volatility of each node? (graph independent)
* What's the natural frequency of each node? (graph dependent)

Create simulation

In [1]:
import numpy as np
import pandas as pd
import networkx as nx

# Simulation parameters
num_nodes = 100
dim = 2
time_horizon = 10
dt = 1e-2
num_time_steps = int(time_horizon / dt)
beta_gain = 1

def run_simultion(simulation_seed=1,target_seed=29,graph_seed=31,independent=True):

    def kuramoto(graph, theta, omega, dt):
        deg = dict(graph.degree())
        dtheta = np.zeros_like(theta)
        for u, v, data in graph.edges(data=True):
            coupling = data['weight']
            dtheta[u] += dt * (omega[u] + (1/deg[u]) * coupling * np.sin(theta[v] - theta[u]))
            dtheta[v] += dt * (omega[v] + (1/deg[v]) * coupling * np.sin(theta[u] - theta[v]))
        return theta + dtheta

    def periodic_drift(beta, theta, omega,mu_0, t):
        return mu_0 + beta*np.sin(omega*t + theta)

    # Create a graph

    graph = nx.erdos_renyi_graph(n=num_nodes,p=np.log(num_nodes)/num_nodes+0.01,seed=graph_seed)
    for edge in graph.edges:
        graph[edge[0]][edge[1]]['weight'] = np.random.rand()

    # Nature frequency (target variable)
    np.random.seed(target_seed)
    natural_freq = np.random.rand(num_nodes)
    
    # Othe oscilator perameters
    np.random.seed(simulation_seed)
    omega = np.stack(dim*[natural_freq],axis=1)  # Intrinsic frequencies
    theta = 2 * np.pi * np.random.rand(num_nodes,dim)  # Initial phases
    beta =  beta_gain * np.ones((num_nodes,dim)) # amplitude
    mu_initial = np.random.rand(num_nodes,dim) # initial drift

    # brownian motion parameters
    X = np.random.rand(num_nodes,dim) # signal
    drift = np.random.rand(num_nodes,dim) # drift
    if independent:
        sigma = np.diag(np.random.randn(dim)) # volatility
    else:
        sigma = np.random.randn(dim,dim) # volatility

    # Simulate
    theta_traj = np.zeros((num_nodes,num_time_steps,dim))
    mu_traj = np.zeros((num_nodes,num_time_steps,dim))
    X_traj = np.zeros((num_nodes,num_time_steps, dim))

    # Time sequence
    tt = np.linspace(0, time_horizon, num_time_steps)

    for step,t in enumerate(tt):
        theta_traj[:, step] = theta
        mu_traj[:,step] = drift
        X_traj[:,step,:] = X
        theta = kuramoto(graph, theta, omega,dt)
        drift = periodic_drift(beta, theta,omega, mu_initial, t)
        X = X + dt * drift + np.sqrt(dt) * np.random.randn(num_nodes,dim) @ sigma

    return X_traj, natural_freq, graph

import torch
from pygsig.graph import CustomStaticGraphTemporalSignal,RandomNodeSplit
import torch_geometric.transforms as T
from pygsig.signature import SignatureFeatures

def get_sequence(X_traj,natural_freq,graph):
    snapshot_count = X_traj.shape[1]
    df_edge = nx.to_pandas_edgelist(graph.to_directed())
    edge_index = torch.tensor(df_edge[['source','target']].values.T,dtype=torch.long)
    edge_weight = torch.tensor(df_edge['weight'].values,dtype=torch.float)
    snapshot_count = X_traj.shape[1]
    features = [ X_traj[:,t,:] for t in range(snapshot_count)]
    targets = [ torch.tensor(natural_freq,dtype=torch.float).unsqueeze(-1) for _ in range(snapshot_count)]
    # Sequential Data
    return CustomStaticGraphTemporalSignal(edge_index=edge_index,edge_weight=edge_weight,features=features,targets=targets)


AttributeError: module 'torch_geometric_temporal' has no attribute 'signals'

Make dataset consisting of multiple SDE trajectories

In [None]:
from tqdm import tqdm

num_runs = 10
seq_dataset = []
for run in tqdm(range(num_runs)):
    X_traj, natural_freq, graph = run_simultion(simulation_seed=run,target_seed=29,graph_seed=31)
    seq_dataset.append(get_sequence(X_traj,natural_freq,graph))

torch.save(seq_dataset,'datasets/brownian/brownian.pth')

Plot trajectories

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# time vector
tt = np.linspace(0, time_horizon, num_time_steps)

# Plotting the graph
plt.figure(figsize=[4,4])
pos = nx.circular_layout(graph)
nx.draw_networkx_nodes(graph,pos,node_color='black',node_size=1)
nx.draw_networkx_edges(graph,pos,width=1,edge_color='black',alpha=0.3)

# Plot time series
for d in range(dim):
    plt.figure()
    for i in range(num_nodes):
        plt.plot(tt, X_traj[i,: , d],'black', label=f'Node {i}', alpha=0.3)
    plt.xlabel(f'$t$')
    plt.ylabel(f'$X_{d+1}(t)$')
    plt.show()

# Path XY
plt.figure()
for i in range(num_nodes):
    plt.plot(X_traj[i,: , 0], X_traj[i,: , 1],'black', label=f'Node {i}', alpha=0.3)
plt.xlim(-10,10)
plt.ylim(-10,10)
plt.xlabel(f'$X_1(t)$')
plt.ylabel(f'$X_2(t)$')
plt.show()

Signature Feature Map + GNNs

In [None]:
import torch
import torch.nn as nn
from pygsig.models import GCNRegression, MLPRegression, ChebNetRegression
import signatory
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tqdm import tqdm
import numpy as np

num_epochs = 1000
num_splits = 4
signature_depth = 4

# Function to initialize models
k_list = [2, 4]
models = []
models += [ChebNetRegression(num_channels=[signatory.signature_channels(channels=2, depth=signature_depth), 64, 1], K=k) for k in k_list]
models += [MLPRegression(num_channels=[signatory.signature_channels(channels=2, depth=signature_depth), 64, 64, 64, 1])]


dataset = []
for seq in seq_dataset:
    signature_transform = SignatureFeatures(seq, sig_depth=signature_depth, normalize=True, log_signature=False)
    node_split = RandomNodeSplit(train_ratio=0.60, eval_ratio=0.10, num_splits=num_splits, seed=29)
    transform = T.Compose([signature_transform, node_split])
    dataset.append(transform(seq))

mae_list = []
mse_list = []

for model in models:
    print(f'Model: {model._get_name()}')
    print(f"Number of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4,weight_decay=1e-5)

    mse = []
    mae = []

    with tqdm(total=len(dataset) * num_splits) as pbar:
        for split in range(num_splits):
            for data in dataset:
                model.reset_parameters()
                train_mask, test_mask, eval_mask = data.train_mask[split], data.test_mask[split], data.eval_mask[split]
                for epoch in range(num_epochs):
                    # train
                    model.train()
                    optimizer.zero_grad()
                    out = model(data.x, data.edge_index)
                    train_loss = criterion(out[train_mask], data.y[train_mask])
                    train_loss.backward()
                    optimizer.step()
                    # evaluate
                    model.eval()
                    with torch.no_grad():
                        eval_loss = criterion(out[eval_mask], data.y[eval_mask])

                    if epoch % 10 == 0:
                        print(f'Split {split}, Epoch {epoch}, Train MSE Loss: {train_loss.item():.4f}, Eval MSE Loss: {eval_loss.item():.4f}')
                        continue

                with torch.no_grad():
                    y_pred = model(data.x, data.edge_index)
                    mse.append(mean_squared_error(data.y[test_mask], y_pred[test_mask]))
                    mae.append(mean_absolute_error(data.y[test_mask], y_pred[test_mask]))
                pbar.update(1)
    mse_list.append(mse)
    mae_list.append(mae)

    print(f' MSE: {np.mean(mse):.4f} ± {np.std(mse):.4f}, MAE: {np.mean(mae):.4f} ± {np.std(mae):.4f}')

Spatiotemporal GNNs

In [None]:
import torch
import torch.nn as nn
from pygsig.models import GConvGRURegression,GConvLSTMRegression
import signatory
from sklearn.metrics import mean_squared_error,mean_absolute_error
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm



num_epochs = 500

k_max = 2
seq_models = [GConvGRURegression(num_channels = [2,64,1],K=k) for k in range(1,k_max+1)] + [GConvLSTMRegression(num_channels = [2,16,1],K=k) for k in range(1,k_max+1)]

seq_mae_list = []
seq_mse_list = []

for model in seq_models:
    mse = []
    mae = []
    print(f'Model: {model._get_name()}')
    print(f"Number of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")

    for seq in tqdm(dataset):
        scaler = StandardScaler()
        seq.features = features = [scaler.fit_transform(snapshot.x) for snapshot in seq ]
        seq.train_mask,seq.eval_mask,seq.test_mask = data.train_mask, data.eval_mask, data.test_mask

        # Training loop
        for split in tqdm(range(data.num_splits)):
            criterion = nn.MSELoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=1e-3,weight_decay=1e-4)
            for epoch in range(num_epochs):
                for snap_idx,snapshot in enumerate(seq):
                    train_mask = seq.train_mask[split]
                    test_mask = seq.test_mask[split]
                    eval_mask = seq.eval_mask[split]

                    model.train()
                    optimizer.zero_grad()
                    out = model(snapshot.x, snapshot.edge_index)
                    train_loss = criterion(out[train_mask], snapshot.y[train_mask])
                    train_loss.backward()
                    optimizer.step()

                    model.eval()
                    with torch.no_grad():
                        out = model(snapshot.x, snapshot.edge_index)
                        eval_loss = criterion(out[eval_mask], snapshot.y[eval_mask])
                if epoch % 1 == 0:
                    print(f'Split {split}, Epoch {epoch}, Train MSE Loss: {train_loss.item():.4f}, Eval MSE Loss: {eval_loss.item():.4f}')
                    continue   

            with torch.no_grad():
                test_mask = seq.test_mask[split]
                y_pred = model(snapshot.x, snapshot.edge_index)
                mse.append(mean_squared_error(data.y[test_mask],y_pred[test_mask]))
                mae.append(mean_absolute_error(data.y[test_mask],y_pred[test_mask]))
                
        
    mse_list.append(mse)
    mae_list.append(mae)

    print(f' MSE: {np.mean(mse):.4f} ± {np.std(mse):.4f}, MAE: {np.mean(mae):.4f} ± {np.std(mae):.4f} ')

Baseline

In [None]:
mse_base = []
mae_base = []
with torch.no_grad():
    for split in range(num_splits):
        train_mask = data.train_mask[split]
        test_mask = data.test_mask[split]
        y_pred = torch.mean(data.y[train_mask]).item() * torch.ones(data.num_nodes)
        mse_base.append( mean_squared_error(data.y[test_mask],y_pred[test_mask]) )
        mae_base.append( mean_absolute_error(data.y[test_mask],y_pred[test_mask]) )

print('Model: Baseline')
print(f'Number of prameters: {0}')
print(f'{np.mean(mse_base):.4f} ± {np.std(mse_base):.4f}, MAE: {np.mean(mae_base):.4f} ± {np.std(mae_base):.4f} ')  

mse_list+=[mse_base]
mae_list+=[mae_base] 


Analyse results

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Set the seaborn theme
sns.set_theme(style="darkgrid")

# Rest of your code
model_names = [f'S-Cheb{k}' for k in k_list]+[f'S-GAT{h}' for h in h_list]+['S-MLP','Base']

fig, axes = plt.subplots(1, 1, figsize=(8, 6))
sns.boxplot(mse_list, ax=axes)
axes.set_ylabel('MSE', fontsize=10)  # Set the font size for the y-axis label
axes.set_xticklabels(model_names, fontsize=8)  # Set the font size for the x-axis tick labels
fig.suptitle('Error Predicting Natural Frequencies')
plt.show()
