# Comparison MLE vs MSM
In this notebook we set up the experiments for comparing the two methods.

In particular, for each simulation, we create a trajectory of an opinion dynamics model with some values of $\epsilon$ and $\mu$, and we estimate $\epsilon$ with MSM and with MLE.

In this way, we create the same conditions for comparing the two methods.

Note that FBCM, PBCM and NBCM were previously called simple_BC, BC_observed_positive and BC_with evidences.
In general, PGABM refers to MLE method.

In [3]:
import sys
sys.path += ["../src"]

from opinion_dynamics_models_estimation import simulate_BC
import pandas as pd
import opinion_dynamics_models_estimation as oe


from MSM_calibrator import calibrate_params_ABM
from MSM_simulators import FBCM_simulator,PBCM_simulator,NBCM_simulator
import json

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import LogNorm, Normalize
import repeat_function
from scipy.special import expit as sigmoid
from time import time

Optimizer hyperparameters found in previous analysis.

In [4]:
hyperparams_simple_BC = json.load(open("../data/hyperparams_simple_bc.json", 'r'))
hyperparams_obs_pos_BC = json.load(open("../data/hyperparams_obs_pos_bc.json", 'r'))
hyperparams_evidence_BC = json.load(open("../data/hyperparams_evidence_bc.json", 'r'))
hyperparams_backfire_BC = json.load(open("../data/hyperparams_backfire_bc.json", 'r'))

Set up the experiments for repeating the estimates with the same conditions for MLE and MSM, returning the values of interest for the comparison (experiments input, estimation time and estimation performances).
This will be used for  creating a dataframe summarizing all the experiments.

In [5]:
def complete_comparison_simple_BC(N, T, edge_per_t, evidences_per_t = 1, rho = 16,
                                  calibration_batches = 400, ensemble_size = 1, seed = None):
    np.random.seed(seed)
    #epsilon, mu = np.random.random(2) / 2
    mu = 0.1
    epsilon = np.random.random() / 2
    
    X, edges, _ = simulate_BC(N, T, edge_per_t, evidences_per_t, (epsilon, mu, rho))
    simulator = BC_simulator(X[0], edges, N, mu, epsilon, rho)
    calibration_df, time_calibration = calibrate_params_ABM(simulator, calibration_batches = calibration_batches, ensemble_size = ensemble_size)
    
    mle_estimation = oe.estimation_BC_model_from_data(X, edges, _, (epsilon, mu, rho), **hyperparams_simple_BC)
    
    return {"real_epsilon": epsilon, 
            "mu": mu, 
            "rho": rho, 
            "epsilon_msm": calibration_df["epsilon"][0], 
            "time_msm": time_calibration,
            "calibration_batches": calibration_batches, 
            "abs_error_msm": np.abs(calibration_df["epsilon"][0] - epsilon),
            "rel_error_msm": np.abs(calibration_df["epsilon"][0] - epsilon) / epsilon,
            "ensemble_size": ensemble_size,
            "epsilon_pgabm": mle_estimation["epsilon_estimated"],
            "time_pgabm": mle_estimation["time"],
            "abs_error_pgabm": mle_estimation["distance_epsilon"],
            "rel_error_pgabm": mle_estimation["distance_epsilon"] / epsilon,
            "num_epochs": mle_estimation["num_epochs"]
           }


def complete_comparison_BC_observed_positive(N, T, edge_per_t, evidences_per_t = 1, rho = 16, calibration_batches = 400, ensemble_size = 1, seed = None):
    np.random.seed(seed)
    #epsilon, mu = np.random.random(2) / 2
    mu = 0.1
    epsilon = np.random.random() / 2
    X, edges,_ = simulate_BC(N, T, edge_per_t, evidences_per_t, (epsilon, mu, rho))
    simulator = BC_simulator_positive_observations(X[0], edges, N, mu, epsilon, rho)
    calibration_df, time_calibration = calibrate_params_ABM(simulator, calibration_batches = calibration_batches, ensemble_size = ensemble_size)
    
    mle_estimation = oe.estimation_BC_model_from_data(X, edges, _, (epsilon, mu, rho), **hyperparams_obs_pos_BC)
    
    return {"real_epsilon": epsilon, 
            "mu": mu, 
            "rho": rho, 
            "epsilon_msm": calibration_df["epsilon"][0], 
            "time_msm": time_calibration,
            "calibration_batches": calibration_batches, 
            "abs_error_msm": np.abs(calibration_df["epsilon"][0] - epsilon),
            "rel_error_msm": np.abs(calibration_df["epsilon"][0] - epsilon) / epsilon,
            "ensemble_size": ensemble_size,
            "epsilon_pgabm": mle_estimation["epsilon_estimated"],
            "time_pgabm": mle_estimation["time"],
            "abs_error_pgabm": mle_estimation["distance_epsilon"],
            "rel_error_pgabm": mle_estimation["distance_epsilon"] / epsilon,
            "num_epochs": mle_estimation["num_epochs"]
           }


def complete_comparison_BC_with_evidences(N, T, edge_per_t, evidences_per_t, rho = 16, calibration_batches = 400, ensemble_size = 1, seed = None):
    np.random.seed(seed)
    #epsilon, mu = np.random.random(2) / 2
    mu = 0.1
    epsilon = np.random.random() / 2
    
    X, edges, evidences = simulate_BC(N, T, edge_per_t, evidences_per_t, (epsilon, mu, rho))
    simulator = BC_simulator_X_evidences(N, edges, evidences, mu, epsilon, rho = rho)
    calibration_df, time_calibration = calibrate_params_ABM(simulator, calibration_batches = calibration_batches, ensemble_size = ensemble_size)
    
    mle_estimation = oe.estimation_BC_model_from_data(X, edges, evidences, (epsilon, mu, rho), **hyperparams_evidence_BC)
    
    return {"real_epsilon": epsilon, 
            "mu": mu, 
            "rho": rho, 
            "epsilon_msm": calibration_df["epsilon"][0], 
            "time_msm": time_calibration,
            "calibration_batches": calibration_batches, 
            "abs_error_msm": np.abs(calibration_df["epsilon"][0] - epsilon),
            "rel_error_msm": np.abs(calibration_df["epsilon"][0] - epsilon) / epsilon,
            "ensemble_size": ensemble_size,
            "epsilon_pgabm": mle_estimation["epsilon_estimated"],
            "time_pgabm": mle_estimation["time"],
            "abs_error_pgabm": mle_estimation["distance_epsilon"],
            "rel_error_pgabm": mle_estimation["distance_epsilon"] / epsilon,
            "X0_r2": mle_estimation["X0_r2"],
            "X0_mae": mle_estimation["X0_mae"],
            "X0_mse": mle_estimation["X0_mse"],
            "num_epochs": mle_estimation["num_epochs"]
           }


In [6]:
compare_simple_cols = ['ensemble_size', 'calibration_batches', 'edge_per_t', 'T',
       'real_epsilon', 'mu', 'rho', 'epsilon_msm', 'time_msm', 'abs_error_msm',
       'rel_error_msm', 'epsilon_pgabm', 'time_pgabm', 'abs_error_pgabm',
       'rel_error_pgabm']

compare_evidences_cols = ['ensemble_size', 'calibration_batches', 'edge_per_t', 'evidences_per_t',
       'T', 'real_epsilon', 'mu', 'rho', 'epsilon_msm', 'time_msm',
       'abs_error_msm', 'rel_error_msm', 'epsilon_pgabm', 'time_pgabm',
       'abs_error_pgabm', 'rel_error_pgabm', 'X0_r2']

compare_backfire_cols = ['ensemble_size', 'calibration_batches', 'edge_per_t', 'evidences_per_t',
                         'T', 'real_epsilon_plus', 'real_epsilon_minus', 'mu_plus', 'mu_minus',
                         'rho', 'epsilon_plus_msm', 'epsilon_minus_msm', 'time_msm',
                         'abs_error_msm_plus', 'abs_error_msm_minus', 'rel_error_msm_plus',
                         'rel_error_msm_minus', 'epsilon_plus_pgabm', 'epsilon_minus_pgabm',
                         'time_pgabm', 'abs_error_pgabm_plus', 'abs_error_pgabm_minus',
                         'rel_error_pgabm_plus', 'rel_error_pgabm_minus', 'X0_r2']

#### FBCM

In [None]:
t0 = time()

#compare_simple_df = pd.DataFrame([], columns = compare_simple_cols)
compare_simple_df = pd.read_csv("../data/compare_simple_bc_230803.csv")

for _ in range(70):
    t1 = time()
    
    print(_, round(t1 - t0, 1))
    compare_simple = repeat_function.rep_simulations(complete_comparison_simple_BC, 
                                {"rho": 16, "N": 100}, 
                                {"ensemble_size": [1],
                                 "calibration_batches": [200],
                                 "edge_per_t": [1,4,16,64],
                                 "T": [16, 32, 64, 128, 256, 512]
                                }, repetitions = 1)
    compare_simple_df_ = pd.DataFrame([{**rep[0], **rep[1]} for rep in compare_simple])
    compare_simple_df = pd.concat([compare_simple_df, compare_simple_df_])
    compare_simple_df.to_csv(f"../data/compare_simple_bc_230805.csv", index = None)
    

#### PBCM

In [None]:
t0 = time()

#compare_pos_df = pd.DataFrame([], columns = compare_simple_cols)
compare_pos_df = pd.read_csv(f"../data/compare_pos_bc_230803.csv")

for _ in range(50):
    t1 = time()

    print(_, round(t1 - t0, 1))
    compare_pos = repeat_function.rep_simulations(complete_comparison_BC_observed_positive, 
                                {"rho": 16, "N": 100}, 
                                {"ensemble_size": [1],
                                 "calibration_batches": [200],
                                 "edge_per_t": [1,4,16,64],
                                 "T": [16, 32, 64, 128, 256, 512]
                                }, repetitions = 1)
    compare_pos_df_ = pd.DataFrame([{**rep[0], **rep[1]} for rep in compare_pos])
    compare_pos_df = pd.concat([compare_pos_df, compare_pos_df_])
    compare_pos_df.to_csv(f"../data/compare_pos_bc_230805.csv", index = None)
    

#### NBCM

In [None]:
t0 = time()
compare_evidences_df = pd.read_csv(f"../data/compare_evidences_bc_230803.csv")

for _ in range(50):
    t1 = time()

    print(_, round(t1 - t0, 1))
    compare_evidences = repeat_function.rep_simulations(complete_comparison_BC_with_evidences, 
                                    {"rho": 16, "N": 100}, 
                                    {"ensemble_size": [1],
                                     "calibration_batches": [200],
                                     "edge_per_t": [1,4,16,64],
                                     "evidences_per_t": [4,8,16],
                                     "T": [16, 32, 64, 128, 256, 512]
                                    }, repetitions = 1)
    compare_evidences_df_ = pd.DataFrame([{**rep[0], **rep[1]} for rep in compare_evidences])
    compare_evidences_df = pd.concat([compare_evidences_df, compare_evidences_df_])
    compare_evidences_df.to_csv(f"../data/compare_evidences_bc_230805.csv", index = None)
    

In [179]:
compare_simple_df = pd.read_csv("../data/compare_simple_bc_230726.csv")
compare_pos_df = pd.read_csv("../data/compare_pos_bc_230726.csv")
compare_evidences_df = pd.read_csv("../data/compare_evidences_bc_230730.csv")
compare_backfire_df = pd.read_csv("../data/compare_backfire_bc_230730.csv")
