# Linear in means model

Following notebook runs a test of structural adversarial estimation of linear in means peer effect model of a form:
$$ y_i = a + b \cdot  1/|\mathcal{N(i)}| \sum_{j \in \mathcal{N(i)}} x_j + \varepsilon_i$$

Where $y_i$ is a scalar outcome of node $i$, $\mathcal{N(i)}$ is set of $i$'s neighbours and $x_j$ is some characteristic of $j$ exogenous to the structural model.
Ultimately, such model can be easily estimated with cross-sectional OLS but this experiment shows that our method works for trivial case, and for 2 parameter model we are able to visualize objective and optimization path. The discriminator is simple GNN with a single convolutional layer and linear classifier and we use ego sampling of depth 1 to create the training sample. Outside minimization problem of the adversarial objective is solved with surrogate optimization using GP as model of expected improvement.

## Imports

In [1]:
import warnings
warnings.filterwarnings("ignore", message="An issue occurred while importing 'torch-sparse'")
warnings.filterwarnings("ignore", message="An issue occurred while importing 'torch-cluster'")

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from tqdm import tqdm
import random

import sys
sys.path.append('/path/to/parent/directory/of/structural_gnn_lib')

from structural_gnn_lib import (
    AdversarialEstimator,
    GraphDataset,
    objective_function
)



## Test dataset 

In [2]:
N_NODES = 2500
N_SAMPLES = 2000
N_EPOCHS = 6
RESOLUTION = 10
P = 0.01

def create_test_graph_dataset(
    num_nodes: int = 100,
    true_a: float = 1.0,
    true_b: float = 2.0,
    p: float = 0.01,
    seed: int = 42
) -> GraphDataset:
    """Generate a test graph dataset for a linear-in-means model."""
    
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)

    G = nx.erdos_renyi_graph(n=num_nodes, p=p, seed=seed)
    A = nx.adjacency_matrix(G).todense()  
    X = np.random.randn(num_nodes, 1)

    Y = np.zeros((num_nodes, 1))
    for i in range(num_nodes):
        neighbors = list(G.neighbors(i))
        if neighbors:
            mean_neighbor_x = np.mean(X[neighbors], axis=0)
        else:
            mean_neighbor_x = 0.0
        Y[i] = true_a + true_b * mean_neighbor_x + np.random.normal(0.0, 0.1)

    N = list(range(num_nodes))

    return GraphDataset(X=X, Y=Y, A=A, N=N)

## Structural model mapping

In [3]:
from numba import njit, prange

@njit(parallel=True)
def linear_in_means_model(x, adjacency, theta):
    """
    Numba-optimized linear-in-means model with automatic parallelization.
    
    Parameters:
    -----------
    x : numpy.ndarray
        Node features (n × k) - must be C-contiguous
    adjacency : numpy.ndarray
        Adjacency matrix (n × n) - must be C-contiguous
    theta : tuple or list
        Parameters (a, b) - pass as tuple for better Numba performance
    
    Returns:
    --------
    numpy.ndarray
        Generated outcomes (n × 1)
    """
    a, b = theta[0], theta[1]
    n = x.shape[0]
    y = np.zeros((n, 1), dtype=x.dtype)
    
    for i in prange(n):
        neighbor_sum = 0.0
        neighbor_count = 0
        
        for j in range(n):
            if adjacency[i, j] > 0:
                neighbor_sum += x[j, 0]
                neighbor_count += 1
        
        if neighbor_count > 0:
            mean_neighbor_x = neighbor_sum / neighbor_count
        else:
            mean_neighbor_x = 0.0
            
        y[i, 0] = a + b * mean_neighbor_x
    
    return y

In [4]:
def discriminator_factory(input_dim, hidden_dim=16, num_classes=2):
    class SimpleGNN(nn.Module):
        def __init__(self, in_dim, hid_dim, num_cls):
            super().__init__()
            self.conv = GCNConv(in_dim, hid_dim)
            self.classifier = nn.Linear(hid_dim, num_cls)

        def forward(self, data):
            x, edge_index, batch = data.x, data.edge_index, data.batch
            x = F.relu(self.conv(x, edge_index))
            x = F.dropout(x, p=0.2, training=self.training)
            x = global_mean_pool(x, batch)
            return self.classifier(x)

    return SimpleGNN(input_dim, hidden_dim, num_classes)

## Visualization utils 

In [5]:
def visualize_objective_surface(estimator, m, resolution, num_epochs, verbose=False):
    a_range = np.linspace(-3, 5, resolution)
    b_range = np.linspace(-1, 5, resolution)
    A, B = np.meshgrid(a_range, b_range)

    Z = np.zeros_like(A)

    total_evals = resolution * resolution
    with tqdm(total=total_evals, desc="Evaluating objective surface") as pbar:
        for i in range(resolution):
            for j in range(resolution):
                theta = [A[i, j], B[i, j]]
                Z[i, j] = objective_function(
                    theta,
                    estimator.ground_truth_generator,
                    estimator.synthetic_generator,
                    discriminator_factory=estimator.discriminator_factory,
                    num_epochs=num_epochs,
                    m=m,
                    verbose=verbose,
                )
                pbar.update(1)

    fig = plt.figure(figsize=(12, 5))

    ax1 = fig.add_subplot(121, projection='3d')
    surf = ax1.plot_surface(A, B, Z, cmap='viridis', alpha=0.8)
    ax1.set_xlabel('Parameter a')
    ax1.set_ylabel('Parameter b')
    ax1.set_zlabel('Discriminator Accuracy')
    ax1.set_title('Objective Function Surface')

    true_a, true_b = 1.0, 2.0
    ax1.scatter([true_a], [true_b], [Z.min()], color='red', s=100, marker='*', label='True params')

    ax2 = fig.add_subplot(122)
    contour = ax2.contour(A, B, Z, levels=20, cmap='viridis')
    ax2.clabel(contour, inline=True, fontsize=8)
    ax2.scatter([true_a], [true_b], color='red', s=100, marker='*', label='True params')

    if hasattr(estimator, 'estimated_params') and estimator.estimated_params is not None:
        est_a, est_b = estimator.estimated_params
        ax1.scatter([est_a], [est_b], [Z.min()], color='orange', s=100, marker='^', label='Estimated params')
        ax2.scatter([est_a], [est_b], color='orange', s=100, marker='^', label='Estimated params')

    ax2.set_xlabel('Parameter a')
    ax2.set_ylabel('Parameter b')
    ax2.set_title('Objective Function Contours')
    ax2.legend()

    plt.colorbar(surf, ax=ax1, shrink=0.5)
    plt.tight_layout()
    plt.show()

    return Z, (A, B)

## Execution

In [None]:
if __name__ == "__main__":
    print("Testing Adversarial Estimation for Linear-in-Means Model")
    print("=" * 60)
    true_params = [1.0, 2.0]
    print("\n1. Generating test dataset...")
    test_data = create_test_graph_dataset(num_nodes=N_NODES, true_a=true_params[0], true_b=true_params[1], p=P)

    print("\n2. Creating adversarial estimator...")
    estimator = AdversarialEstimator(
        ground_truth_data=test_data,
        structural_model=linear_in_means_model,
        initial_params=[0.0, 0.0],
        bounds=[(-100, 100), (-100, 100)],
        discriminator_factory=discriminator_factory,
    )

    print("\n3. Visualizing objective function surface...")
    visualize_objective_surface(
        estimator,
        m=N_SAMPLES,
        resolution=RESOLUTION,
        num_epochs=N_EPOCHS,
        verbose=True,
    )

    print("\n4. Running adversarial estimation...")
    result = estimator.estimate(m=N_SAMPLES, num_epochs=N_EPOCHS, verbose=True)
    estimated_params = result['x'] if isinstance(result, dict) else result.x
    estimator.estimated_params = estimated_params

    print("\n5. Results:")
    print(f"   - True parameters: a={true_params[0]}, b={true_params[1]}")
    print(f"   - Estimated parameters: a={estimated_params[0]:.4f}, b={estimated_params[1]:.4f}")
    print(
        f"   - Estimation error: a_error={abs(estimated_params[0] - true_params[0]):.4f}, "
        f"b_error={abs(estimated_params[1] - true_params[1]):.4f}"
    )

    plt.tight_layout()
    plt.show()

Testing Adversarial Estimation for Linear-in-Means Model

1. Generating test dataset...

2. Creating adversarial estimator...

3. Visualizing objective function surface...


Evaluating objective surface:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch 0, Loss: 0.5689
Epoch 1, Loss: 0.1922
Epoch 2, Loss: 0.0559
Epoch 3, Loss: 0.0178
Epoch 4, Loss: 0.0082
Epoch 5, Loss: 0.0050


Evaluating objective surface:   1%|          | 1/100 [00:05<09:25,  5.71s/it]

Test accuracy for theta=[-3.0, -1.0]: 1.0000
Epoch 0, Loss: 0.4607
Epoch 1, Loss: 0.1630
Epoch 2, Loss: 0.0413
Epoch 3, Loss: 0.0124
Epoch 4, Loss: 0.0058
Epoch 5, Loss: 0.0037


Evaluating objective surface:   2%|▏         | 2/100 [00:09<07:15,  4.44s/it]

Test accuracy for theta=[-2.111111111111111, -1.0]: 1.0000
Epoch 0, Loss: 0.5948
Epoch 1, Loss: 0.3089
Epoch 2, Loss: 0.1079
Epoch 3, Loss: 0.0324
Epoch 4, Loss: 0.0127
Epoch 5, Loss: 0.0070


Evaluating objective surface:   3%|▎         | 3/100 [00:12<06:33,  4.05s/it]

Test accuracy for theta=[-1.2222222222222223, -1.0]: 1.0000
Epoch 0, Loss: 0.5623
Epoch 1, Loss: 0.3170
Epoch 2, Loss: 0.1266
Epoch 3, Loss: 0.0420
Epoch 4, Loss: 0.0171
Epoch 5, Loss: 0.0094


Evaluating objective surface:   4%|▍         | 4/100 [00:16<06:07,  3.83s/it]

Test accuracy for theta=[-0.3333333333333335, -1.0]: 1.0000
Epoch 0, Loss: 0.6452
Epoch 1, Loss: 0.5440
Epoch 2, Loss: 0.4236
Epoch 3, Loss: 0.2998
Epoch 4, Loss: 0.1976
Epoch 5, Loss: 0.1280


Evaluating objective surface:   5%|▌         | 5/100 [00:19<05:52,  3.71s/it]

Test accuracy for theta=[0.5555555555555554, -1.0]: 0.9967
Epoch 0, Loss: 0.6847
Epoch 1, Loss: 0.6601
Epoch 2, Loss: 0.6214
Epoch 3, Loss: 0.5646
Epoch 4, Loss: 0.4980


Evaluating objective surface:   6%|▌         | 6/100 [00:23<05:46,  3.68s/it]

Epoch 5, Loss: 0.4257
Test accuracy for theta=[1.4444444444444446, -1.0]: 0.9250
Epoch 0, Loss: 0.6772
Epoch 1, Loss: 0.5585
Epoch 2, Loss: 0.4128
Epoch 3, Loss: 0.2572
Epoch 4, Loss: 0.1407
Epoch 5, Loss: 0.0736


Evaluating objective surface:   7%|▋         | 7/100 [00:26<05:36,  3.62s/it]

Test accuracy for theta=[2.333333333333333, -1.0]: 1.0000
Epoch 0, Loss: 0.6203
Epoch 1, Loss: 0.4824
Epoch 2, Loss: 0.3252
Epoch 3, Loss: 0.1666
Epoch 4, Loss: 0.0684
Epoch 5, Loss: 0.0288


Evaluating objective surface:   8%|▊         | 8/100 [00:30<05:27,  3.56s/it]

Test accuracy for theta=[3.2222222222222214, -1.0]: 1.0000
Epoch 0, Loss: 0.6402
Epoch 1, Loss: 0.4588
Epoch 2, Loss: 0.2588
Epoch 3, Loss: 0.1141
Epoch 4, Loss: 0.0471
Epoch 5, Loss: 0.0221


Evaluating objective surface:   9%|▉         | 9/100 [00:33<05:20,  3.53s/it]

Test accuracy for theta=[4.111111111111111, -1.0]: 1.0000
Epoch 0, Loss: 0.5060
Epoch 1, Loss: 0.3272
Epoch 2, Loss: 0.1693
Epoch 3, Loss: 0.0727
Epoch 4, Loss: 0.0314
Epoch 5, Loss: 0.0162


Evaluating objective surface:  10%|█         | 10/100 [00:37<05:20,  3.56s/it]

Test accuracy for theta=[5.0, -1.0]: 1.0000
Epoch 0, Loss: 0.4020
Epoch 1, Loss: 0.1415
Epoch 2, Loss: 0.0368
Epoch 3, Loss: 0.0109
Epoch 4, Loss: 0.0051
Epoch 5, Loss: 0.0032


Evaluating objective surface:  11%|█         | 11/100 [00:41<05:20,  3.60s/it]

Test accuracy for theta=[-3.0, -0.33333333333333337]: 1.0000
Epoch 0, Loss: 0.4873
Epoch 1, Loss: 0.1660
Epoch 2, Loss: 0.0430
Epoch 3, Loss: 0.0136
Epoch 4, Loss: 0.0064
Epoch 5, Loss: 0.0041


Evaluating objective surface:  12%|█▏        | 12/100 [00:44<05:20,  3.64s/it]

Test accuracy for theta=[-2.111111111111111, -0.33333333333333337]: 1.0000
Epoch 0, Loss: 0.6520
Epoch 1, Loss: 0.3765
Epoch 2, Loss: 0.1546
Epoch 3, Loss: 0.0498
Epoch 4, Loss: 0.0179


Evaluating objective surface:  13%|█▎        | 13/100 [00:48<05:19,  3.67s/it]

Epoch 5, Loss: 0.0089
Test accuracy for theta=[-1.2222222222222223, -0.33333333333333337]: 1.0000
Epoch 0, Loss: 0.6066
Epoch 1, Loss: 0.3727
Epoch 2, Loss: 0.1681
Epoch 3, Loss: 0.0600
Epoch 4, Loss: 0.0237


Evaluating objective surface:  14%|█▍        | 14/100 [00:52<05:25,  3.78s/it]

Epoch 5, Loss: 0.0123
Test accuracy for theta=[-0.3333333333333335, -0.33333333333333337]: 1.0000
Epoch 0, Loss: 0.6767
Epoch 1, Loss: 0.5952
Epoch 2, Loss: 0.4821
Epoch 3, Loss: 0.3441
Epoch 4, Loss: 0.2174


Evaluating objective surface:  15%|█▌        | 15/100 [00:56<05:31,  3.90s/it]

Epoch 5, Loss: 0.1294
Test accuracy for theta=[0.5555555555555554, -0.33333333333333337]: 0.9983
Epoch 0, Loss: 0.6862
Epoch 1, Loss: 0.6520
Epoch 2, Loss: 0.5944
Epoch 3, Loss: 0.5135
Epoch 4, Loss: 0.4163


Evaluating objective surface:  16%|█▌        | 16/100 [01:00<05:22,  3.84s/it]

Epoch 5, Loss: 0.3251
Test accuracy for theta=[1.4444444444444446, -0.33333333333333337]: 0.9842
