### Load the data

In [1]:
import pandas as pd
import json
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import ijson
from tqdm import tqdm
from scipy.stats import percentileofscore
import ipywidgets as widgets
from IPython.display import display, clear_output
from tabulate import tabulate


Here we load the data as running every time the code for creating the graphs will require a lot of time.

In [2]:
file_path = "citation_graph.json"

with open(file_path, 'r') as json_file:
    graph_data = json.load(json_file)

#create graph from the data -- citation graph 
G = nx.node_link_graph(graph_data)

In [3]:
file_path = "collaboration_graph.json"

# Read the graph data from the JSON file
with open(file_path, 'r') as json_file:
    graph_data = json.load(json_file)

#create graph from the data -- collaboration graph 
N = nx.node_link_graph(graph_data)

# 2 - Controlling System

## 2.1 - Backend 

### Functionality 1 - Graph's features

Here we create the functionality 1. In each step, we compute one of the metrics requested and then print them as result.

In [4]:
def graph_features(graph, graph_name):
    #n nodes
    num_nodes = len(graph.nodes())
    #n edges 
    num_edges = len(graph.edges())
    #density 
    density = nx.density(graph)
    #distribution degree
    degree_distribution = list(dict(nx.degree(graph)).values())
    #average degree
    average_degree = np.mean(degree_distribution)
    
    #hubs
    #define percentile 95 percentile
    percentile_95 = np.percentile(degree_distribution, 95)
    #take the hubs
    hubs = [node for node, degree in dict(nx.degree(graph)).items() if degree > percentile_95]

    #sparse or dense
    #formula 
    threshold = len(graph.edges()) / (len(graph.nodes()) * (len(graph.nodes()) -1)  )
    is_dense = "Dense" if density > threshold else "Sparse"

    #return the result ()
    report = f"Graph Features Report for '{graph_name}':\n"
    report += f"Number of Nodes: {num_nodes}\n"
    report += f"Number of Edges: {num_edges}\n"
    report += f"Graph Density: {density}\n"
    report += f"Degree Distribution: {degree_distribution}\n"
    report += f"Average Degree: {average_degree}\n"
    report += f"Graph Hubs: {hubs}\n"
    report += f"Graph Density Status: {is_dense}\n"

    return report

graph_result = graph_features(G, "Citation Graph")
#graph_result = graph_features(N, "Collaboration Graph")
print(graph_result)

Graph Features Report for 'Citation Graph':
Number of Nodes: 10000
Number of Edges: 25807
Graph Density: 0.0002580958095809581
Degree Distribution: [4, 1, 2, 1, 6, 8, 5, 2, 1, 2, 10, 2, 4, 2, 4, 0, 2, 5, 2, 0, 1, 6, 1, 3, 1, 3, 1, 0, 5, 1, 1, 1, 5, 3, 3, 0, 4, 3, 2, 4, 1, 2, 0, 0, 3, 3, 4, 11, 7, 3, 12, 5, 15, 0, 2, 1, 5, 1, 0, 10, 4, 2, 3, 12, 4, 2, 6, 1, 0, 6, 3, 4, 8, 0, 0, 4, 2, 4, 6, 4, 3, 1, 2, 2, 3, 0, 1, 3, 2, 1, 3, 0, 0, 3, 0, 0, 1, 2, 3, 4, 1, 5, 0, 3, 1, 0, 11, 0, 7, 5, 3, 6, 0, 15, 6, 3, 3, 5, 0, 2, 2, 7, 3, 9, 5, 1, 4, 1, 2, 3, 0, 2, 0, 4, 3, 0, 1, 1, 1, 0, 1, 8, 2, 4, 0, 9, 4, 0, 5, 5, 4, 2, 3, 1, 48, 3, 5, 1, 8, 2, 0, 2, 7, 2, 4, 1, 1, 0, 4, 5, 17, 6, 3, 15, 4, 8, 2, 9, 6, 12, 6, 3, 5, 1, 1, 2, 3, 0, 1, 3, 7, 10, 1, 1, 2, 1, 3, 0, 3, 4, 4, 2, 2, 5, 0, 3, 1, 8, 5, 1, 3, 2, 13, 3, 5, 1, 9, 6, 2, 1, 23, 0, 5, 0, 3, 4, 1, 2, 2, 3, 3, 10, 7, 3, 30, 0, 1, 4, 0, 0, 11, 0, 1, 2, 5, 12, 1, 7, 1, 12, 0, 1, 1, 3, 9, 1, 2, 0, 0, 4, 0, 1, 8, 5, 1, 2, 6, 14, 0, 20, 8, 4, 4, 5, 2, 27, 

### Functionality 2 - Nodes' contribution

Here we create the functionality 1. In each step, we compute one of the metrics requested and then print them as result.  <br>
For this step, we use the networkx implemented functions.

In [5]:
def centrality(graph, node, graph_name):
    #Betweenness
    betweenness = nx.betweenness_centrality(graph)[node]
    #PageRank
    pagerank = nx.pagerank(graph)[node]
    #closeness centrality
    closeness_centrality = nx.closeness_centrality(graph)[node]
    #degree centrality
    degree_centrality = nx.degree_centrality(graph)[node]

    #results
    data = [
        ["Betweenness", betweenness],
        ["Pagerank", pagerank],
        ["Closeness Centrality", closeness_centrality],
        ["Degree Centrality", degree_centrality]
    ]
    
    table = tabulate(data, headers=["Feature", "Value"], tablefmt="fancy_grid")
    report = f"Graph Features Report for '{graph_name}':\n{table}"

    return report

node = 2893933380  
#node = 156762113
result = centrality(G, node, "Citation Graph")
#result = centrality(N, node, "Collaboration Graph")
print(result)

Graph Features Report for 'Citation Graph':
╒══════════════════════╤═════════════╕
│ Feature              │       Value │
╞══════════════════════╪═════════════╡
│ Betweenness          │ 9.35727e-06 │
├──────────────────────┼─────────────┤
│ Pagerank             │ 7.83614e-05 │
├──────────────────────┼─────────────┤
│ Closeness Centrality │ 0.00040004  │
├──────────────────────┼─────────────┤
│ Degree Centrality    │ 0.0010001   │
╘══════════════════════╧═════════════╛


### Functionality 3 - Shortest ordered walk

## 2.2 - Frontend 

### Visualization 1 - Visualize graph features

<code style="background:orange;color:black">**Introduction:**</code> <br>
The main idea here is to present an interactive menu where the user can decide the steps to take. <br>
In few words, we take the function created before (with only one modification that is needed for visualisation reasons), <br>
and the user is able to navigate a menu choosing the graphs and the features they want to inspect. <br>

The systems opens asking the user wheather they want to enter the Visualisation System or exit. <br>
If entered, user will be able to choose the graph or return back to the home. Once the graph is chosen, the user will be able to retrieve the feature they want. <br>
The features are as requested: <br>
- Table containing general information <br>
- Table that lists the graph's hubs <br>
- Plot with citation received (only Citation graph) <br>
- Plot with citation gived (only Citation graph) <br>
- Plot number of collaborators per author (only collaboration graph) <br>
Please, note that not all features are visible for both graphs as some are specific only for one of them and vice-versa. <br>

Below you will see also screenshots of every step.

In [6]:
#identical to the backend function. it will print things just into a nicer way
def graph_features(graph, graph_name):
    #n nodes
    num_nodes = len(graph.nodes())
    #n edges 
    num_edges = len(graph.edges())
    #density 
    density = nx.density(graph)
    #distribution degree
    degree_distribution = list(dict(nx.degree(graph)).values())
    #average degree
    average_degree = np.mean(degree_distribution)
    
    #hubs
    #define percentile 95 percentile
    percentile_95 = np.percentile(degree_distribution, 95)
    #take the hubs
    hubs = [node for node, degree in dict(nx.degree(graph)).items() if degree > percentile_95]

    #sparse or dense
    #formula 
    threshold = len(graph.edges()) / (len(graph.nodes()) * (len(graph.nodes()) -1)  )
    is_dense = "Dense" if density > threshold else "Sparse"

    #here the nicer way I was talking about
    data = [
        ["Number of Nodes", num_nodes],
        ["Number of Edges", num_edges],
        ["Graph Density", density],
        ["Average Degree", average_degree],
        ["Graph Hubs", hubs],
        ["Graph Density Status", is_dense],
    ]
    
    table = tabulate(data, headers=["Feature", "Value"], tablefmt="fancy_grid")
    report = f"Graph Features Report for '{graph_name}':\n{table}"

    return report

In [None]:
###############VISUALISATION##############################
#here I create my menu and visualisation system

#create the two buttons of the home page
btn_visualization_system = widgets.Button(description="Visualization System", button_style='info')
btn_exit = widgets.Button(description="Exit", button_style='danger')

#define what the exit button does 
def exit_clicked(b):
    clear_output(wait=True)
    print("System exited")
    # You can add additional cleanup or exit code here

#define what the visualisation system button does
def display_buttons(b=None):
    clear_output(wait=True)
    display(btn_visualization_system, btn_exit)

#define the second page
def visualization_system_clicked(b):
    clear_output(wait=True)
    print("Select the graph you want to investigate:")
    
    #create the other buttons to show in the second page
    btn_collaboration_graph = widgets.Button(description="Collaboration Graph", button_style='primary')
    btn_citation_graph = widgets.Button(description="Citation Graph", button_style='success')
    btn_return = widgets.Button(description="Return", button_style='warning')
    
    #define first button graph
    def collaboration_graph_clicked(b):
        clear_output(wait=True)
        display_graph_options(N, "Collaboration Graph")

    #define second button graph
    def citation_graph_clicked(b):
        clear_output(wait=True)
        display_graph_options(G, "Citation Graph")

    #define the return button 
    def return_clicked(b):
        display_buttons()  #it goes back to the beginning but it will not exit the system

    #when the button is clicked, do its functions
    btn_collaboration_graph.on_click(collaboration_graph_clicked)
    btn_citation_graph.on_click(citation_graph_clicked)
    btn_return.on_click(return_clicked)

    #display buttons of the second page
    display(btn_collaboration_graph, btn_citation_graph, btn_return)

#once the user choose a graph, do this
def display_graph_options(graph, graph_name):
    clear_output(wait=True)
    #print name and instructions on what to do
    print(f"Graph Selected: {graph_name}. Select the feature you want to investigate:")
    
    #create buttons of the third page (graph-specific)
    btn_table = widgets.Button(description="General Information", button_style='primary')
    btn_hubs = widgets.Button(description="Graph's Hubs", button_style='primary')
    btn_citation_received = widgets.Button(description="Citations Received Plot", button_style='primary')
    btn_citation_gived = widgets.Button(description="Citations Gived Plot", button_style='primary')
    btn_collaborations = widgets.Button(description="Collaborations Plot", button_style='primary')
    btn_return = widgets.Button(description="Return", button_style='warning')

    #create the button to display the general info 
    def table_clicked(b):
        clear_output(wait=True)
        print(graph_features(graph, graph_name))

    #display hubs
    def hubs_clicked(b):
        clear_output(wait=True)
        hubs = nx.degree(graph)
        hubs = [node for node, degree in hubs if degree > 0]  
        print(f"Graph Hubs for '{graph_name}':\n{hubs}")

    #plot of the citation received
    def citation_received_clicked(b):
        #it will be visible only for the citation graph, so to remind the user it is a graph-specific feature, I remind it here
        print("\033[91;1mWARNING: THIS FEATURE IS AVAILABLE ONLY FOR THE CITATION GRAPH!!!\033[0m")
        #exploit the structure of the graph
        in_degrees = dict(G.in_degree())
        in_degree_values = list(in_degrees.values())

        #plot
        plt.figure(figsize=(16, 6))
        plt.hist(in_degree_values, bins=100, color='red', alpha=0.7)
        plt.title('Citations Received by Papers')
        plt.xlabel('In-Degree (Number of Citations Received)')
        plt.ylabel('Frequency')
        plt.grid(True)
        plt.xlim(0,70)
        plt.show()

    #plot the citation gived
    def citation_gived_clicked(b):
        #it will be visible only for the citation graph, so to remind the user it is a graph-specific feature, I remind it here
        print("\033[91;1mWARNING: THIS FEATURE IS AVAILABLE ONLY FOR THE CITATION GRAPH!!!\033[0m")
        #exploit the structure of the greph 
        out_degrees = dict(G.out_degree())
        out_degree_values = list(out_degrees.values())

        #plot
        plt.figure(figsize=(16, 6))
        plt.hist(out_degree_values, bins="auto", color='r', alpha=0.7)
        plt.title('Citations Given by Papers')
        plt.xlabel('Out-Degree (Number of Citations Given)')
        plt.ylabel('Frequency')
        plt.grid(True)
        plt.xlim(0,43)
        plt.xticks(np.arange(0, 43))
        plt.show()

    #plot collaborations
    def collaborations_clicked(b):
        #it will be visible only for the collaboration graph, so to remind the user it is a graph-specific feature, I remind it here
        print("\033[91;1mWARNING: THIS FEATURE IS AVAILABLE ONLY FOR THE COLLABORATION GRAPH!!!\033[0m")
        degrees = dict(N.degree())
        #I retrieve only the top 50 as the graph is pretty big
        top_nodes = sorted(degrees, key=degrees.get, reverse=True)[:50]

        plt.figure(figsize=(16, 6))
        plt.hist([degrees[node] for node in top_nodes], bins=100, color='red', alpha=0.7)
        plt.title('N collaborations for Top 50 Authors')
        plt.xlabel('N')
        plt.ylabel('Frequency')
        plt.grid(True)
        plt.xticks(np.arange(235, 261))  
        plt.xlim(240,260)
        plt.show()


    def return_clicked(b):
        display_buttons()  #return button. works as before 

    #here we do the following:
    #if the button clicked is Citation graph, then hide the button that does not concern that graph  
    if graph_name == "Citation Graph":
        btn_collaborations.layout.visibility = 'hidden'
    #here we do the same
    if graph_name == "Collaboration Graph":
        btn_citation_gived.layout.visibility = 'hidden'
        btn_citation_received.layout.visibility = 'hidden'
    
    #if clicked, do this
    btn_table.on_click(table_clicked)
    btn_hubs.on_click(hubs_clicked)
    btn_citation_received.on_click(citation_received_clicked)
    btn_citation_gived.on_click(citation_gived_clicked)
    btn_collaborations.on_click(collaborations_clicked)
    btn_return.on_click(return_clicked)

    #display button of the third page 
    display(btn_table, btn_hubs, btn_citation_received, btn_citation_gived, btn_collaborations, btn_return)

#if clicked, do this. Those two are for the two buttons of the homepage
btn_exit.on_click(exit_clicked)
btn_visualization_system.on_click(visualization_system_clicked)

#show the buttons of the homepage
display_buttons()

Graph Features Report for 'Collaboration Graph':
╒══════════════════════╤═══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════

---

<code style="background:red;color:black">**Note:**</code> <br>
Here you can see step by step how the system is displayed:

---

<style>
    .container {
        display: flex;
        justify-content: center;
    }
    .item {
        margin: 5px;
        text-align: center; 
    }
    .item img {
        display: block;
        margin: 0;
    }
</style>

<div class="container">
    <div class="item">
        <img src="/images/home.png" width=150 height=100>
        <p><strong>Homepage of the system</strong></p>
    </div>
    <div class="item">
        <img src="/images/graphs.png" width=250 height=200>
        <p><strong>The user now has to choose the graph</strong></p>
    </div>
    <div class="item">
        <img src="/images/Collaboration_graph.png" width=350 height=300>
        <p><strong>Collaboration Graph</strong></p>
    </div>
    <div class="item">
        <img src="/images/Citation_graph.png" width=350 height=300>
        <p><strong>Citation Graph</strong></p>
    </div>
</div>

### Visualization 2 - Visualize the node's contribution

<code style="background:orange;color:black">**Introduction:**</code> <br>
As before, we create an interactive menu for the user. <br>
It is very similar to the previous one but this time after choosing the graph, you are asked to input a number (our node of interest) and the system <br>
will return the result from the function previously created.

In [None]:
#create home buttons 
btn_os_selection = widgets.Button(description="Visualization Systems", button_style='primary')
btn_return_main = widgets.Button(description="Exit", button_style='danger')

#if the system is entered, show these buttons
def os_selection_clicked(b):
    clear_output(wait=True)
    display(btn_citation_graph, btn_collaboration_graph, btn_return_sub, btn_return_main)

#exit
def return_main_clicked(b):
    clear_output(wait=True)
    print("System exited")

#if clicked, do this
btn_os_selection.on_click(os_selection_clicked)
btn_return_main.on_click(return_main_clicked)

#show home buttons
display(btn_os_selection, btn_return_main)


#create buttons of the second page
btn_citation_graph = widgets.Button(description="Citation Graph", button_style='primary')
btn_collaboration_graph = widgets.Button(description="Collaboration Graph", button_style='primary')
btn_return_sub = widgets.Button(description="Return", button_style='warning')

#store the graph selected (citation or collaboration)
selected_graph_type = None

#here we retrieve the description of the button clicked.
#this will be needed in few steps because we need to understand which graphs we are using
#(so the graph of the function must be different)
def graph_selection_clicked_sub(b):
    clear_output(wait=True)
    global selected_graph_type
    selected_graph_type = b.description.lower()
    display(input_node, btn_return_result)

#when selected, return the input
def return_sub_clicked(b):
    clear_output(wait=True)
    display(btn_os_selection, btn_return_main)

#when selected, do this
btn_citation_graph.on_click(graph_selection_clicked_sub)
btn_collaboration_graph.on_click(graph_selection_clicked_sub)
btn_return_sub.on_click(return_sub_clicked)

#create buttons for the input part
input_node = widgets.IntText(value=1, description='Node Number:', style={'description_width': 'initial'})
btn_return_result = widgets.Button(description="Submit", button_style='warning')

#when the result is inputed, first, print a message to ask some patient,
#then, check which graph has been selected
def return_result_clicked(b):
    clear_output(wait=True)
    node = input_node.value
    print("Retrieving results; this could take some minutes, please wait...")
    
    #if citation graph has been selected
    if selected_graph_type == "citation graph":
        #perfrom the function with the correct graph 
        result = centrality(G, node, "Citation Graph")
    #if collaboration graph has been selected
    elif selected_graph_type == "collaboration graph":
        #perfrom the function with the correct graph 
        result = centrality(N, node, "Collaboration Graph")

    #show results 
    print(f"Result for node {node} in the {selected_graph_type}:")
    print(result)

btn_return_result.on_click(return_result_clicked)

Retrieving results; this could take some minutes, please wait...
Result for node 2893933380 in the citation graph:
Graph Features Report for 'Citation Graph':
╒══════════════════════╤═════════════╕
│ Feature              │       Value │
╞══════════════════════╪═════════════╡
│ Betweenness          │ 9.35727e-06 │
├──────────────────────┼─────────────┤
│ Pagerank             │ 7.83614e-05 │
├──────────────────────┼─────────────┤
│ Closeness Centrality │ 0.00040004  │
├──────────────────────┼─────────────┤
│ Degree Centrality    │ 0.0010001   │
╘══════════════════════╧═════════════╛


---

<code style="background:red;color:black">**Note:**</code> <br>
The layout is identical to the previous one. You you have a homepage, you select the graph, but this time you are requested to input <br> 
a number (node) as below. After, the output will be shown.

---

<style>
    .container {
        display: flex;
        justify-content: center;
    }
    .item {
        margin: 5px;
        text-align: left; 
    }
    .item img {
        display: block;
        margin: 0;
    }
</style>

<div class="container">
    <div class="item">
        <img src="https://github.com/camillabrigandi/ADM-HW5/blob/main/images/submit.png" width=400 height=125 />
        <p><strong>Here the user has to input the node</strong></p>
    </div>
</div>

### Visualization 3 - Visualize the shortest-ordered route