# Validation

This notebook contains the steps followed to build the validated networks starting from the original retweet network

In [1]:
import numpy as np
import networkx as nx
from NEMtropy import DirectedGraph
from NEMtropy.network_functions import build_adjacency_from_edgelist
from tqdm import tqdm
import matplotlib.pyplot as plt
from validation import Graph_validation

import graph_tool.all as gt

### Here we are going to estimate the parameter of the model CReMa.

Let's load all the relevant information regarding the original graph.

In [2]:
# Upload the graph
G = gt.load_graph("Left.gt")

# Take the property "weight" associate to each link
weights = G.edge_properties["weight"]

# Create the weighted adjacency matrix
adj_weigh = gt.adjacency(G, weight=weights)


# get the property map for all the IDs for each node
id_map = G.vertex_properties["id"]

# initialize an empty list to save the IDs
ordered_ids = []

# Loop through all the vertices of the graph and save their IDs in the list.
for v in G.vertices():
    ordered_ids.append(id_map[v])

We prepare the metrics that will be inserted into the CReMa to estimate the parameters.

In [None]:
# Lets store the binary structure and strength sequence
adj_bin = adj_weigh.astype(bool).astype(int)

out_strength =  np.array(adj_weigh.sum(axis=1)).flatten() 
in_strength =  np.array(adj_weigh.sum(axis=0)).flatten() 

# We can initialiase our graph instance with the strength sequence
graph_weighted = DirectedGraph(strength_sequence=np.concatenate([out_strength, in_strength]))

We use the internal solver to maximize the likelihood and estimate the parameters.

In [2]:
graph_weighted.solve_tool(model="crema",
                          method="quasinewton",
                          initial_guess=initial_values,
                          adjacency=adj_bin,
                          verbose=False,
                          max_steps=1)

In [13]:
b_in = graph_weighted.b_in
b_out = graph_weighted.b_out

Once the parameters are estimated, we can proceed with the validation of the links using the 'Graph_validation' function.

In [None]:
validated_edges = Graph_validation(adj_weigh,ordered_ids,b_in,b_out)

As output, we have the list of all edges that have been determined as significant according to our model.

# Function To Validate Links

Questa è la funzione che ho costruito per validare i links, lavora con matrici sparse in modo da essere più veloce.
in ingresso prende la lista di ids che ho costruiton prima, in modo da dare in output la lista di edges validate con gli identificativi reali.

### Creo il grafo a partire dall'edge list e lo salvo

In [None]:
graph_with_weight = gt.load_graph(f'/sdf/3stepmodel/retweet_network/retweet_full/Full_only_lr.gt')

In [None]:
missing = [i for i in ss if i not in W]

In [None]:
len(missing)

In [None]:
# Create a directed graph
graph = gt.Graph(directed=True)

# Add vertices
vertex_ids = set(edge[0] for edge in validated_edges) | set(edge[1] for edge in validated_edges)
vertex_map = {v: graph.add_vertex() for v in vertex_ids}
edges = [(vertex_map[source], vertex_map[target]) for source, target,weight in  validated_edges]


graph.add_edge_list(edges)

# Add weights as edge property
weights = graph.new_edge_property("double")
for edge, weight in zip(graph.edges(), validated_edges):
    weights[edge] = weight[2]

graph.edge_properties["weight"] = weights

inverted_vmap = {j:i for i,j in vertex_map.items()}
vertex_ids = graph.new_vertex_property("string")
for i in graph.vertices():
    vertex_ids[i] = inverted_vmap[i]

graph.vertex_properties["id"] = vertex_ids
graph.save(path_to_save + "Full_only_lr.gt")


In [None]:
ls /sdf/3stepmodel/ensamble/validated_PNAS/

In [None]:

# Save the graph
import networkx as nx

# La tua edge list

# Creazione del grafo
G = nx.DiGraph()
G.add_edges_from(validated_edges)

In [None]:
#nx.write_graphml(G, path_to_save + 'Right.graphml')

# Comparison between our validated graph and the edge list

In [None]:
path_to_validated_graphs = '/sdf/3stepmodel/ensamble/validated_graphs/' 

path_to_save = '/sdf/3stepmodel/ensamble/validated_PNAS/'

path_to_parameters = '/sdf/3stepmodel/ensamble/dcm_parameters/'

path_to_graphs = '/sdf/3stepmodel/retweet_network/retweet_full/'

In [None]:
b_in = np.load(path_to_parameters + 'Left_in.npy')
b_out = np.load(path_to_parameters + 'Left_out.npy')


initial_values = np.concatenate((b_out,b_in))

In [None]:
#np.save(path_to_parameters + 'Full_only_lr_in.npy' , b_in)
#np.save(path_to_parameters + 'Full_only_lr_out.npy' , b_out)

In [None]:
G_validated = gt.load_graph(path_to_validated_graphs + 'Grafo_left.gt')

In [None]:
G_validated.num_edges()

In [None]:
del G_validated

In [None]:
# Verifica se contiene valori diversi da 1
contains_values_other_than_one = np.any(adj_weigh.data != 1)

if contains_values_other_than_one:
    print("La matrice contiene valori diversi da 1.")
else:
    print("La matrice contiene solo valori 1 (e implicitamente 0).")

In [None]:
ls /sdf/3stepmodel/ensamble/validated_PNAS/