# ML Estimation Partial Observed Bounded Confidence Model
In this notebook we estimate the parameter $\epsilon$ in a BC model with full observations on the opinions and partial observations on the edges.

The likelihood to be optimized is 

$\mathcal{L}(\epsilon) = \sum\limits_{j: s_j = 1} \log \kappa_{\epsilon}(\hat{e}_j) + \sum\limits_{j: s_j = 0} \log \sum\limits_{e \in V^2} (1 - \kappa_{\epsilon}(e)) P(e)$

where $\kappa = \sigma(\rho \cdot (\epsilon - | \Delta X |))$ is the probability of having a positive interaction, and $s = 1$ if the interaction is positive, and $s = 0$ otherwise.
$e$ are the edges (latent), and $\hat{e} = e$ if the edges are observed ($s = 1$) and $\hat{e} = \emptyset$ if the edges are hidden ($s = 0$).

We maximise $\mathcal{L}$ with gradient descent.


In [1]:
import torch
import numpy as np
import torch.nn as nn

from scipy.special import expit as sigmoid
from scipy.special import logit
from time import time
from tqdm import tqdm
from operator import itemgetter 

import sys
sys.path += ["../src"]
from simulator_opinion_dynamics import kappa_from_epsilon
import simulator_opinion_dynamics as sod
from initialize_model import EarlyStopping,RandomizeEpsilon,choose_optimizer


In [2]:
class BC_Observe_positive_Estimation(nn.Module):
    
    def __init__(self, parameters0, X, edges, sample_pairs = 50):
        
        super().__init__()
        
        # epsilon0 is the initialization of epsilon
        epsilon0, rho = parameters0
        self.rho = rho
        uvst = sod.convert_edges_uvst(edges)
        u,v,s,t = uvst[:, uvst[2,:] == 1]
        self.X = X
        # store the matrix of the differences of X and update it at each time
        self.diff_X = X[t,u] - X[t,v]
        # optimize theta, that is the logit of 2 * epsilon (this is useful to bound epsilon in [0, 0.5])
        theta = torch.tensor([logit(2 * epsilon0)], requires_grad = True)
        self.theta = nn.Parameter(theta)
        
        _, self.edge_per_t, _ = edges.size()
        self.T, self.N = X.size()
        # we compute the sample mean of having a negative interaction
        # sample_pairs is the number of pairs sampled at each time to compute the mean
        self.sample_pairs = sample_pairs
        self.n_negative_interactions = self.edge_per_t - edges.sum(axis = 1)[:,2]
        
    def forward(self):
        epsilon = torch.sigmoid(self.theta) / 2
        # probability of having the observed positive interactions
        kappa_pos = kappa_from_epsilon(epsilon, self.diff_X, self.rho)
        #sample sample_pairs pairs of nodes and put them into a tensor
        u_sample, v_sample = torch.tensor((np.random.rand((self.T - 1) * self.sample_pairs, self.N).argpartition(2,axis = 1)[:,:2]).T)
        u_sample, v_sample = u_sample.reshape(self.sample_pairs, self.T-1), v_sample.reshape(self.sample_pairs, self.T-1)
        diff_sample_X = (torch.gather(self.X, 1, u_sample.T) - torch.gather(self.X, 1, v_sample.T))
        #probability of having one negative interaction (as sample mean of sample pairs)
        kappa_neg = 1 - (kappa_from_epsilon(epsilon, diff_sample_X, self.rho)).mean(axis = 1)

        return kappa_pos, kappa_neg
    
    def neg_log_likelihood_function(kappa_pos, kappa_neg, n_negative_interactions, t_minibatch, t_pos_minibatch):
        log_likelihood_observed = torch.sum(torch.log(kappa_pos))
        log_likelihood_non_observed = torch.sum(torch.log(kappa_neg) * n_negative_interactions)
        
        neg_tot_log_likelihood = - log_likelihood_observed - log_likelihood_non_observed
        
        return neg_tot_log_likelihood

    def neg_log_likelihood_function_minibatch(kappa_pos, kappa_neg, n_negative_interactions, t_minibatch, t_pos_minibatch):
        log_likelihood_observed = torch.sum(torch.log(kappa_pos)[t_pos_minibatch])
        log_likelihood_non_observed = torch.sum((torch.log(kappa_neg) * n_negative_interactions)[t_minibatch])
        
        neg_tot_log_likelihood = - log_likelihood_observed - log_likelihood_non_observed
        
        return neg_tot_log_likelihood


In [3]:
# this function is used for accessing the positive interactions at each time
# when having a list of only the positive interactions
def list_of_t_indices(t, T):
    l_indices = [[] for u in np.arange(T - 1)]
    for ind in np.arange(len(t)):
        l_indices[t[ind].item()].append(ind)
    return l_indices
  


In [4]:
def gradient_descent_BC_observe_positive(X, edges, rho, num_epochs, sample_pairs = 50,
                                         epsilon0 = 0.25, optimizer_name = "adam",
                                         lr = 0.05, hide_progress = True, minibatch_size = 0, seed = None,
                                         early_stopping_kw = {"patience": 20, "min_delta": 1e-5, "min_epochs": 20, "long_run_delta": 1e-5, "long_run_diff":10, "long_run_patience": 5}
                                        ):
    if seed is not None:
        np.random.seed(seed)
    u,v,s,t = uvst = sod.convert_edges_uvst(edges)
    
    model_class = BC_Observe_positive_Estimation
    model = model_class((epsilon0, rho), X, edges)
    if minibatch_size == 0:
        loss_function = model_class.neg_log_likelihood_function
    if minibatch_size > 0:
        loss_function = model_class.neg_log_likelihood_function_minibatch
    
    T,N = X.shape
    
    t_pos = uvst[3, uvst[2,:] == 1]
    # indices of the positive interactions, for each time
    indices_t = list_of_t_indices(t_pos, T)
    
    early_stopping = EarlyStopping(**early_stopping_kw)
    optimizer = choose_optimizer(optimizer_name, lr, model)
    
    history = {"epsilon": [epsilon0], "loss": []}
    t_minibatch, t_pos_minibatch = None, None
    t0 = time()
    
    for epoch in tqdm(range(num_epochs), disable = hide_progress):
        
        if minibatch_size > 0:
            t_minibatch = torch.randperm(T-1)[:minibatch_size]
            t_pos_minibatch = sum(list(itemgetter(*t_minibatch)(indices_t)), []) #use itemgetter to access multiple element of a list (same as [l[u] for u in ind], use sum(list, []) to flatten the list
        
        kappa_pos, kappa_neg = model()
        loss = loss_function(kappa_pos, kappa_neg, model.n_negative_interactions, t_minibatch, t_pos_minibatch)
        
        loss.backward()
        optimizer.step()
        
        history["epsilon"].append(sigmoid(model.theta.item()) / 2)
        history["loss"].append(loss.item())
        
        optimizer.zero_grad()
        
        if epoch > early_stopping_kw["min_epochs"]:
            early_stopping(history["epsilon"][-3], history["epsilon"][-2], history["epsilon"][-1], epoch)
        if early_stopping.early_stop:
            break
            
    t1 = time()
    history["time"] = t1 - t0
    


In [5]:
N, T, edge_per_t = 100, 256, 4
evidences_per_t = 4
epsilon, mu, rho = 0.35, 0.1, 16

X, edges, evidences = sod.simulate_BC(N, T, edge_per_t, evidences_per_t, (epsilon, mu, rho))

In [11]:
history = gradient_descent_BC_observe_positive(X, edges, rho, num_epochs = 100, epsilon0 = 0.25, 
                                               optimizer_name = "adam", lr = 0.05, seed = 222)
    