# Exploring BFS

This notebook presents the results obtained using the BFS algorithm and showcases the characteristics of the skeleton. It includes metrics such as the number of nodes and links involved at each step, along with the percentage of connections transitioning from one actor to another. 

In [None]:
import pickle
import pandas as pd
import graph_tool.all as gt
from  utilities import build_CI_rank,get_classes

retweet_net = 'RETWEET NETWORK PATH'
bfs_path = 'BFS PATH'

validated_path = 'VALIDATED NETWORKs PATH'
validated_networks = ['Left.gt','Right.gt','Full.gt','Full_only_lr.gt']

def analyze_bfs_info(net_type,validated_path, threshold=1.0):

    print(f'Gathering infromations for the {net_type}\n')

    O = gt.load_graph(retweet_net + net_type.replace('Grafo_','').capitalize())
    V = gt.load_graph(validated_path + net_type)

    ov = O.num_vertices()
    oe = O.num_edges()

    vv = V.num_vertices()
    ve = V.num_edges()
    
    # Load data from the pickle file
    with open(bfs_path + net_type.replace('.gt','_bfs_nodes.txt'), 'rb') as file:
        loaded_object = pickle.load(file)

    
    # Calculate total number of nodes
    total_nodes=set()
    for i in loaded_object:
        for j in loaded_object[i]:
            total_nodes.update(loaded_object[i][j])

    total_nodes = len(total_nodes)
    
    # Read BFS info from CSV file
    bfs_info = pd.read_csv(bfs_path + net_type.replace('.gt','_bfs.csv'))

    # Calculate total connections and weights
    tot_connections = bfs_info.groupby('Steps').sum()['Connections'].sum()
    tot_weights = bfs_info.groupby('Steps').sum()['Weights'].sum()

    print('The retweet network contains ', ov ,' and ', oe, ' edges;')
    print('The validated network contains ', vv ,' and ', ve, ' edges;')
    print('The bfs skeleton contains ', total_nodes ,' and ', tot_connections, ' edges;\n')

    # Calculate percentages of connections and weights per step
    link_to_step = {}
    link_to_step_w = {}
    for step, o in bfs_info.groupby('Steps', as_index=False).sum()[['Connections', 'Steps', 'Weights']].iterrows():
        perc_connections = round(o['Connections'] / tot_connections * 100, 2)
        perc_weights = round(o['Weights'] / tot_weights * 100, 2)
        link_to_step[o['Steps']] = perc_connections
        link_to_step_w[o['Steps']] = perc_weights

    # Analyze BFS steps
    for step in bfs_info.Steps.unique():
        temp = bfs_info[bfs_info.Steps == step]
        sum_connections = temp['Connections'].sum()
        sum_weights = temp['Weights'].sum()
        if sum_connections==0:continue

        nodes_per_step = sum(len(loaded_object[step+1][l]) for l in loaded_object[step+1])
        print(f"Step {step} accounts for:\n",
              round(nodes_per_step / total_nodes, 2) * 100, "of the total nodes in the bfs;\n",
              link_to_step[step],'of the total links\n',
              link_to_step_w[step], 'of the total weights.\n')
        for cnt, line in temp.iterrows():
            cn = round(line['Connections'] / sum_connections * 100, 2)
            cn_w = round(line['Weights'] / sum_weights * 100, 2)
            if cn > threshold:
                print(f"Connections between {line['Sources']} and {line['Targets']} account for ",
                      cn, ' of the connections and ', cn_w, ' of the total weights')
        print('\n')


In [31]:
validated_networks

['Left.gt', 'Right.gt', 'Full.gt', 'Full_only_lr.gt']

In [32]:
to_check = validated_networks[0]

In [33]:
V = gt.load_graph(validated_path + to_check)

OL, OLI, I, A, S = get_classes(V, category='all_2', top=1000)

analyze_bfs_info(to_check,validated_path, threshold=1.0)

N° Influencers: 418
N° OL: 1085
N° OLI: 509
N° Sources: 193
N° Adopters: 934669
Number of nodes does not sum to the total!
Gathering infromations for the Left.gt

The retweet network contains  2515391  and  21397211  edges;
The validated network contains  936866  and  6536347  edges;
The bfs skeleton contains  710432  and  2027887  edges;

Step 0 accounts for:
 6.0 of the total nodes in the bfs;
 3.11 of the total links
 6.3 of the total weights.

Connections between S and A account for  98.75  of the connections and  98.76  of the total weights


Step 1 accounts for:
 48.0 of the total nodes in the bfs;
 73.7 of the total links
 70.19 of the total weights.

Connections between OL and A account for  11.81  of the connections and  11.08  of the total weights
Connections between OLI and A account for  24.15  of the connections and  24.22  of the total weights
Connections between I and A account for  23.71  of the connections and  23.59  of the total weights
Connections between A and A ac