In [2]:
#LIBRARIES

import sys
import os

import pandas as pd
import numpy as np

import networkx as nx
import matplotlib.pyplot as plt


# Define absolute python path
sys.path.insert(0, '/Users/giorgiobolchi2/Documents/GitHub/jrc-egd/LLM/') 



#DATA

data_path = '/Users/giorgiobolchi2/Documents/GitHub/jrc-egd/LLM/Data/Outputs/0320/0320_network.xlsx'

data = pd.read_excel(data_path,sheet_name='reviewed')


In [8]:
# Create a directed graph from the DataFrame
G = nx.DiGraph()

# Add edges to the graph
for _, row in data.iterrows():
    G.add_edge(row['source_subtheme'], row['impact_subtheme'], impact_type=row['impact_type'])

# Now you can perform various analyses on the graph
print(f'''Nodes ({len(G.nodes())}): {G.nodes()}''')
print(f'''Edges ({len(G.edges())}): {G.edges()}''')


Nodes (54): ['Biodiversity Protection & Conservation', 'Food quality', 'Food affordability', 'Critical Raw Materials - Extraction & Import', 'Improve Water Quality', 'Improve Soils Health', 'Pesticides Reduction', 'Transport Logistics', 'Renewable Energy', 'Biodiversity Protection & Conservation - Fisheries', 'Biofuels', 'Energy Efficiency', 'Biodiversity Protection & Conservation - Monitoring', 'Improve Air Quality', 'Waste Reduction - Plastic & Packaging', 'Biodiversity Protection & Conservation - Urban Nature', 'Urban Mobility', 'Energy Efficiency - Buildings', 'GHG Removal', 'Renewable Energy - Hydrogen Production', 'Circularity/Recycling - Critical Raw Materials - Batteries Recycling', 'Terrestrial Ecosystems Restoration - Agricultural Ecosystems', 'GHG Reduction - Transports', 'Circularity/Recycling', 'Net-Zero Technology - Road Vehicles', 'Competitive Agriculture', 'Terrestrial Ecosystems Restoration', 'GHG Reduction - Buildings', 'Terrestrial Ecosystems Restoration - Forests', 

In [10]:
# Calculate various centrality measures
in_degree_centrality = nx.in_degree_centrality(G)
out_degree_centrality = nx.out_degree_centrality(G)
betweenness_centrality = nx.betweenness_centrality(G)
closeness_centrality = nx.closeness_centrality(G)
eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=1000)
pagerank = nx.pagerank(G)
katz_centrality = nx.katz_centrality(G, alpha=0.1, beta=1.0)

# Create a DataFrame combining all centrality measures
centrality_df = pd.DataFrame({
    'Node': list(G.nodes),
    'In-Degree Centrality': [in_degree_centrality[node] for node in G.nodes],
    'Out-Degree Centrality': [out_degree_centrality[node] for node in G.nodes],
    'Betweenness Centrality': [betweenness_centrality[node] for node in G.nodes],
    'Closeness Centrality': [closeness_centrality[node] for node in G.nodes],
    'Eigenvector Centrality': [eigenvector_centrality[node] for node in G.nodes],
    'PageRank': [pagerank[node] for node in G.nodes],
    'Katz Centrality': [katz_centrality[node] for node in G.nodes]
})

# Sort by a specific centrality measure (e.g., Betweenness Centrality) to get top 10 nodes
centrality_df = centrality_df.sort_values(by='Betweenness Centrality', ascending=False)

# Get the top 10 nodes
top_10_central_nodes = centrality_df.head(10)

# Print the table
print(top_10_central_nodes)

                          Node  In-Degree Centrality  Out-Degree Centrality  \
4        Improve Water Quality              0.301887               0.264151   
5         Improve Soils Health              0.264151               0.226415   
22  GHG Reduction - Transports              0.207547               0.169811   
13         Improve Air Quality              0.433962               0.132075   
25     Competitive Agriculture              0.188679               0.150943   
2           Food affordability              0.264151               0.094340   
1                 Food quality              0.245283               0.150943   
45               GHG Reduction              0.132075               0.245283   
36          Climate Resilience              0.132075               0.113208   
10                    Biofuels              0.113208               0.169811   

    Betweenness Centrality  Closeness Centrality  Eigenvector Centrality  \
4                 0.153976              0.540816      

In [3]:
# Importing the dataset
G = nx.karate_club_graph()

# Statistics of the graph
print(f"Number of nodes: {len(G.nodes())}")
print(f"Number of edges: {len(G.edges())}")
average_clustering = nx.average_clustering(G)
print(f"Average Clustering Coefficient: {average_clustering}")
diameter = nx.diameter(G)
print(f"Graph Diameter: {diameter}")
density = nx.density(G)
print(f"Graph Density: {density}")
# Number of nodes: 34
# Number of edges: 78
# Average Clustering Coefficient: 0.57
# Graph Diameter: 5
# Graph Density: 0.14

# Plotting graph with Matplotlib and Networkx
plt.figure(figsize=(7,5))
nx.draw_networkx(G, with_labels=True, font_weight='bold')
plt.show()

Number of nodes: 34
Number of edges: 78
Average Clustering Coefficient: 0.5706384782076823
Graph Diameter: 5
Graph Density: 0.13903743315508021


: 