# Simulation and inference of degree-corrected Poisson RGG

$$P(G|D, \theta, k) = \prod_{i<j} \frac{(k_i k_j \mathcal{F}(D_{ij}, \theta))^{A_{ij}}}{A_{ij}!} \exp(- k_i k_j \mathcal{F}(D_{ij}, \theta))$$

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import sys
sys.path.append('../../utils')

In [None]:
from graphs import (get_independent_components_rgg, make_inter_vertex_distances, 
                    deg_corrected_poissonian_random_geometric_graph)

In [None]:
from plot import plot_multigraph
from networkx.linalg.graphmatrix import adjacency_matrix
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import math
import arviz as az

In [None]:
import nest_asyncio
nest_asyncio.apply()
import stan

In [None]:
SEED = 42
n = 10
dim = 2

In [None]:
lambda_r_truth = 2
exponential_kernel = lambda dist, ki, kj: ki * kj * math.exp(-lambda_r_truth * dist)

In [None]:
kolness_truth = np.ones(n)
kolness_truth[0] = 10

In [None]:
r = np.random.RandomState(SEED)
G_poisson = deg_corrected_poissonian_random_geometric_graph(
    n, 1000, kolness_truth, exponential_kernel, r, 
)

In [None]:
adj = adjacency_matrix(G_poisson)
adj = adj.toarray()

In [None]:
ax = plot_multigraph(G_poisson)

In [None]:
distances = make_inter_vertex_distances(G_poisson)

## Inference

In [None]:
from scipy.stats import expon

In [None]:
x = np.linspace(0,40)
pdf = expon.pdf(x, scale = 1./0.1)
plt.plot(x, pdf)

In [None]:
def make_dc_poisson_rgg_model(n_physicians):
    model = f"""
    data {{
        array[{n_physicians}, {n_physicians}] real<lower=0> distances;
        array[{n_physicians}, {n_physicians}] int<lower=0> adj;                                
    }}
    parameters {{
        real<lower=0> lambda_r;        
        array[{n_physicians}] real<lower=0> kolness;
    }}
    transformed parameters {{
        array[{n_physicians}, {n_physicians}] real<lower=0> conn_kernel;
        for (i in 1:{n_physicians})
            for (j in 1:{n_physicians})
                conn_kernel[i,j] = kolness[i] * kolness[j] * exp(-lambda_r * distances[i,j]);  
    }}
    model {{        
        lambda_r ~ exponential(0.1);
        kolness ~ exponential(0.1);
        
        for (i in 1:{n_physicians})
            for (j in i+1:{n_physicians})        
                adj[i,j] ~ poisson(conn_kernel[i,j]);        
    }}
    """
    return model

In [None]:
poisson_rgg_model_dc = make_dc_poisson_rgg_model(n)

In [None]:
poisson_rgg_data = {'distances': distances, 'adj': adj}

In [None]:
posterior = stan.build(poisson_rgg_model_dc, data=poisson_rgg_data, random_seed=SEED)

In [None]:
fit = posterior.sample(num_chains=4, num_samples=1000)

In [None]:
inferred = az.from_pystan(fit)

In [None]:
summary = az.summary(inferred, var_names = ['lambda_r', 'kolness'])
summary

In [None]:
az.plot_trace(inferred, var_names = ['lambda_r', 'kolness']);

In [None]:
[{'ref_val':ki} for i, ki in enumerate(kolness_truth)]

In [None]:
az.plot_posterior(inferred, var_names = ['lambda_r', 'kolness'], ref_val = [lambda_r_truth] + list(kolness_truth));

In [None]:
fig, ax = plt.subplots()
ax.plot(kolness_truth, summary.loc[[f'kolness[{i}]' for i in range(n)], 'mean'], '.k')

x = np.linspace(0,max(kolness_truth))
ax.plot(x, x, '-r', label='identity')
ax.axis('square')

ax.set_xlabel('Truth')
ax.set_ylabel('Posterior mean')

## Large numbers of physicians

In [None]:
from seaborn import heatmap

In [None]:
r = np.random.RandomState(SEED)

n = 100  # number of physicians
n_kol = 10  # number of KOLs

kolness_truth = np.ones(n)
kolness_truth[:n_kol] = r.uniform(3,7, size=n_kol)

G_poisson = deg_corrected_poissonian_random_geometric_graph(
    n, 1000, kolness_truth, exponential_kernel, r, 
)

adj = adjacency_matrix(G_poisson)
adj = adj.toarray()

distances = make_inter_vertex_distances(G_poisson)

In [None]:
kolness_truth

In [None]:
heatmap(adj);

In [None]:
poisson_rgg_model_dc = make_dc_poisson_rgg_model(n)

In [None]:
poisson_rgg_data = {'distances': distances, 'adj': adj}

In [None]:
posterior = stan.build(poisson_rgg_model_dc, data=poisson_rgg_data, random_seed=SEED)

In [None]:
fit = posterior.sample(num_chains=4, num_samples=1000)

In [None]:
inferred = az.from_pystan(fit)

In [None]:
summary = az.summary(inferred, var_names = ['lambda_r', 'kolness'])

In [None]:
az.plot_trace(inferred, var_names = ['lambda_r']);

In [None]:
az.plot_posterior(inferred, var_names = ['lambda_r'], ref_val = [lambda_r_truth]);

In [None]:
fig, ax = plt.subplots()
ax.plot(kolness_truth, summary.loc[[f'kolness[{i}]' for i in range(n)], 'mean'], '.k')

x = np.linspace(0,max(kolness_truth))
ax.plot(x, x, '-r', label='identity')
ax.axis('square')

ax.set_xlabel('KOLness (truth)')
ax.set_ylabel('Inferred KOLness (Posterior mean)')
ax.legend();