# Week 5


Instructions (da cancellare) 
1) Write a function to compute the PageRank of the nodes in a graph (you are not allowed to use built-in functions from NetworkX).
2) Run it on your graph using = 0.15, save the total number of iterations, and check that your function returns the same results (in terms of the scores) as the built-in function from NetworkX.
3) Identify the node with the highest PageRank, compare it with the one you got from week 3, and give an interpretation to the possible difference.
4) Provide the cumulative distribution, compare it with the one you got from week 3, and comment the results.
5) Optional: Re-run the PageRank using = 0.5, save the total number of iterations, compare with the results (in terms of both the number of iterations and the cumulative distribution) obtained for = 0.15, and comment.
Hint: treat weighted graphs as unweighted & links in undirected graphs as doubly directed (each undirected link becomes a directed link in both directions). Additionally, make sure you run the PageRank algorithm on a connected graph.


In [6]:
import networkx as nx
import csv
#OUTPUT should be a DICTIONARY--> key: node, value: pagerank

# Load the graph from CSV files
def load_graph(nodes_file_path, edges_file_path):
    G = nx.Graph()
    with open(nodes_file_path, 'r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            G.add_node(row['Id'], label=row['Label'])

    with open(edges_file_path, 'r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            G.add_edge(row['Source'], row['Target'], weight=int(row['Weight']))
    return G

def adjacency_matrix_from_graph(graph):
    num_nodes = len(graph.nodes())
    adjacency_matrix = [[0] * num_nodes for _ in range(num_nodes)]

    for source, target in graph.edges():
        source_index = list(graph.nodes()).index(source)
        target_index = list(graph.nodes()).index(target)
        adjacency_matrix[source_index][target_index] = 1

    return adjacency_matrix

def calculate_pagerank(adjacency_matrix, damping_factor=0.15, num_iterations=100):
    num_nodes = len(adjacency_matrix)
    initial_pagerank = [1 / num_nodes] * num_nodes  # Initialize PageRank equally for all nodes

    for _ in range(num_iterations):
        new_pagerank = [0] * num_nodes
        for i in range(num_nodes):
            for j in range(num_nodes):
                if adjacency_matrix[j][i] == 1:
                    new_pagerank[i] += initial_pagerank[j] / sum(adjacency_matrix[j])
        for i in range(num_nodes):
            new_pagerank[i] = (1 - damping_factor) / num_nodes + damping_factor * new_pagerank[i]

        initial_pagerank = new_pagerank
    print(initial_pagerank)    
    return initial_pagerank


if __name__ == "__main__":
    nodes_file_path = '/Users/coding/Desktop/network_analysis/sna_titanic/Project/Graph/nodes.csv'
    edges_file_path = '/Users/coding/Desktop/network_analysis/sna_titanic/Project/Graph/edges.csv'

    # Load the graph
    graph = load_graph(nodes_file_path, edges_file_path)

    # Convert the graph to an adjacency matrix
    adjacency_matrix = adjacency_matrix_from_graph(graph)

    # Calculate PageRank
    pagerank = calculate_pagerank(adjacency_matrix)

    # Assign PageRank values to nodes
    for i, node in enumerate(graph.nodes()):
        graph.nodes[node]['pagerank'] = pagerank[i]


[0.012142857142857143, 0.012182453416149068, 0.01223054204805492, 0.012289722090222928, 0.012322714963731591, 0.012535928942946244, 0.012182453416149068, 0.01243436742418, 0.012804917951145117, 0.012996991720412293, 0.012803654820093156, 0.012605258745268052, 0.012443756027269572, 0.012990853395565723, 0.013076002476434246, 0.013009836527690586, 0.01291847070504122, 0.012743347979749346, 0.012647960876923856, 0.0131222594098085, 0.012496574344609119, 0.013738776107007, 0.013459915337393331, 0.013682995551781695, 0.014199856968025538, 0.013303184566875037, 0.01244153641386086, 0.01385314711684487, 0.01291748085340527, 0.013400241723345781, 0.013013892712810915, 0.013620583906613559, 0.01354083207813608, 0.01520579345014672, 0.012629751031534707, 0.012572273796539283, 0.01270375170645742, 0.012962761343086847, 0.014377626280451498, 0.016534270222519222, 0.015423602964728847, 0.01270375170645742, 0.012365190217391304, 0.012267200817098943, 0.013385884227488895, 0.012190945774762995, 0.014

In [8]:
nx.pagerank(graph, alpha=0.15,max_iter=100 )

{'5097316': 0.032498962451122146,
 '5098095': 0.026765155628607595,
 '5097875': 0.023202761824182416,
 '5098619': 0.018027306989774672,
 '5098545': 0.01678283049932719,
 '5101061': 0.01678196621713339,
 '5096969': 0.018978991450266564,
 '5097941': 0.01740659604073832,
 '5098606': 0.016848383698217822,
 '5097894': 0.01790299279854587,
 '5098041': 0.015592837923724753,
 '5096948': 0.015845053465810388,
 '5098128': 0.015988032467500732,
 '5098463': 0.014373896538199791,
 '5098583': 0.015359994397249964,
 '5101981': 0.014292016686679579,
 '5100134': 0.013936005318640775,
 '5097212': 0.014193566775713816,
 '5101285': 0.014124600466863202,
 '5101840': 0.013930916057362831,
 '5100235': 0.01323851891052862,
 '5098242': 0.013951587691363645,
 '5102690': 0.01354740403054238,
 '5099090': 0.014434391712700972,
 '5098717': 0.013270376585601994,
 '5097994': 0.013894826536231303,
 '5101041': 0.013876242670112298,
 '5098541': 0.013059263319082394,
 '5098267': 0.01255305429906515,
 '5099110': 0.0134451