In [8]:
#import necessary packages
import networkx as nx
import numpy as np
from community import community_louvain #pip install python-louvain package
import leidenalg as la
import igraph as ig
import pandas as pd
import matplotlib.pyplot as plt

<h1>Read and format the data</h1>

In [19]:
# Load the exported Web of Science file
wos_data_orig = pd.read_csv('data/wos_export.csv')

#remove NANs from the Cited References Column
wos_data = wos_data_orig[wos_data_orig['Cited References'].notnull()]

# Extract cited references and reformat in to a list of sources (cited paper) and targets (citing paper) **need to check this is correct behaviour**
cited_references = []
for index, row in wos_data.iterrows():
    refs = row['Cited References'].split('; ')
    for ref in refs:
        cited_references.append({
            'source': row['UT (Unique WOS ID)'],
            'target': ref
        })

# Create a DataFrame
citation_data = pd.DataFrame(cited_references)

# Save to a new CSV file
citation_data.to_csv('citation_data.csv', index=False)

<h1>Perform the analysis</h1>

In [22]:
#read in data
citation_data = pd.read_csv('citation_data_fake.csv')

# Create a directed graph
G = nx.DiGraph()
G.add_edges_from(citation_data[['source', 'target']].values)

# Convert the NetworkX graph to an iGraph object
g = ig.Graph.TupleList(G.edges(), directed=True)

# Perform Leiden community detection
partition = la.find_partition(g, la.ModularityVertexPartition, n_iterations=10)

# Get the community assignments
community_ids = partition.membership

# Create a dictionary mapping nodes to community IDs
node_community = {node: community_id for node, community_id in zip(G.nodes(), community_ids)}

# Calculate community metrics (e.g., size, density)
community_metrics = {}
for community_id in set(community_ids):
    community_nodes = [node for node, cid in node_community.items() if cid == community_id]
    community_graph = G.subgraph(community_nodes)
    community_metrics[community_id] = {
        'size': len(community_nodes),
        'density': nx.density(community_graph)
    }

# Print community metrics
for community_id, metrics in community_metrics.items():
    print(f"Community {community_id}: size={metrics['size']}, density={metrics['density']}")

# Visualize the community structure
pos = nx.spring_layout(G)
colors = [community_ids[node_community[node]] for node in G.nodes()]
print(colors)

options = {
    'node_size': 10,
    'width': 0.1,
}

nx.draw_networkx(G, pos, node_color=colors, with_labels=False,cmap='viridis', **options)

plt.savefig("output.png")
plt.show()


Community 0: size=7, density=1.0238095238095237
Community 1: size=6, density=0.13333333333333333
Community 2: size=5, density=0.3
Community 3: size=4, density=0.25
Community 4: size=4, density=0.5


TypeError: 'int' object is not subscriptable