In [None]:
pip install networkx pyvis sentence-transformers community arxiv neo4j


Collecting pyvis
  Downloading pyvis-0.3.2-py3-none-any.whl.metadata (1.7 kB)
Collecting jedi>=0.16 (from ipython>=5.3.0->pyvis)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence

#Load the data and build the first version of the graph

In [None]:
import json
import networkx as nx
import numpy as np
from pyvis.network import Network
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Load the papers.json
with open('papers.json', 'r') as f:
    papers = json.load(f)

# Fixed colors based on tag
TAG_COLORS = {
    "Quantum Computing": "#1f78b4",
    "Circuit Complexity": "#33a02c",
    "Proof Complexity": "#6a3d9a",
    "Cryptography": "#ff7f00",
    "General P vs NP": "#b2b2b2"
}

# Very basic tag assignment based on keywords
def assign_tag(abstract):
    abstract = abstract.lower()
    if "quantum" in abstract:
        return "Quantum Computing"
    elif "circuit" in abstract:
        return "Circuit Complexity"
    elif "proof" in abstract:
        return "Proof Complexity"
    elif "crypto" in abstract or "encryption" in abstract:
        return "Cryptography"
    else:
        return "General P vs NP"

# Initialize SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Extract abstracts
abstracts = [paper["abstract"] for paper in papers]
titles = [paper["concept"] for paper in papers]
urls = [paper["paper_url"] for paper in papers]

# Encode abstracts
embeddings = model.encode(abstracts)

# Compute similarity matrix
similarity_matrix = cosine_similarity(embeddings)

# Build the graph
G = nx.Graph()

# Add nodes
for idx, paper in enumerate(papers):
    tag = assign_tag(paper["abstract"])
    G.add_node(idx,
               label=paper["concept"],
               abstract=paper["abstract"],
               tag=tag,
               url=paper["paper_url"],
               color=TAG_COLORS[tag])

# Add edges if similarity > 0.7
threshold = 0.7
for i in range(len(papers)):
    for j in range(i + 1, len(papers)):
        sim = similarity_matrix[i, j]
        if sim > threshold:
            G.add_edge(i, j, weight=float(sim))

# Visualize the graph
net = Network(notebook=False, width="100%", height="750px", bgcolor="#ffffff", font_color="black")


# Set some physics for better layout
net.barnes_hut()

for node, data in G.nodes(data=True):
    net.add_node(node,
                 label=data['label'],
                 title=f"<b>Abstract:</b> {data['abstract']}<br><b>Tag:</b> {data['tag']}",
                 color=data['color'],
                 shape='dot',
                 size=15,
                 href=data['url'],
                 target='_blank')

for source, target, data in G.edges(data=True):
    net.add_edge(source, target, value=data['weight'])

# Save the graph

net.from_nx(G)

# Show the graph

net.save_graph('papers_graph_final.html')

print("✅ Graph created and saved as 'papers_graph_final.html'!")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ Graph created and saved as 'papers_graph_final.html'!


# Identify concept hubs looking at papers that are tackling similar concepts\

In [None]:
# Identify Concept Hubs
hub_nodes = []

for node in G.nodes():
    neighbor_tags = set()
    for neighbor in G.neighbors(node):
        neighbor_tags.add(G.nodes[neighbor]['tag'])

    if len(neighbor_tags) >= 1:  # Connected to multiple different fields
        hub_nodes.append(node)

# Update Node Appearance
for node in hub_nodes:
    G.nodes[node]['color'] = '#e31a1c'  # Bright Red for hubs
    G.nodes[node]['size'] = 25          # Bigger size for visibility

# ✅ Now just recreate the network directly from G
net = Network(notebook=False, width="100%", height="750px", bgcolor="#ffffff", font_color="black")
net.barnes_hut()

# 🎯 This automatically pulls all updated node and edge data!
net.from_nx(G)

# Save the updated graph
net.save_graph('papers_graph_with_hubs.html')

print(f"✅ Found {len(hub_nodes)} Concept Hubs! Saved as 'papers_graph_with_hubs.html'")



✅ Found 7 Concept Hubs! Saved as 'papers_graph_with_hubs.html'


In [None]:
# Compute Degree Centrality
centrality = nx.degree_centrality(G)

# Focus only on hubs
hub_centrality = {node: centrality[node] for node in hub_nodes}

# Sort hubs by centrality
top_hubs = sorted(hub_centrality.items(), key=lambda x: x[1], reverse=True)[:5]

#Identify the top 5 most influential concept hubs

In [None]:
# Compute Degree Centrality
centrality = nx.degree_centrality(G)

# Focus only on hubs
hub_centrality = {node: centrality[node] for node in hub_nodes}

# Sort hubs by centrality
top_hubs = sorted(hub_centrality.items(), key=lambda x: x[1], reverse=True)[:5]

# Display Top 5 Concept Hubs
print("\n🏆 Top 5 Most Influential Concept Hubs:")
for idx, (node, score) in enumerate(top_hubs, 1):
    paper = G.nodes[node]
    print(f"\n{idx}. {paper['label']}")
    print(f"   🔗 URL: {paper['url']}")
    print(f"   🏷️ Tag: {paper['tag']}")
    print(f"   ⭐ Centrality Score: {score:.4f}")
    print(f"   📜 Abstract: {paper['abstract'][:300]}...")



🏆 Top 5 Most Influential Concept Hubs:

1. SAT Solvers: Theory and Practice
   🔗 URL: https://sat2011.cs.helsinki.fi/eduschool/material/marques-silva.pdf
   🏷️ Tag: General P vs NP
   ⭐ Centrality Score: 0.1250
   📜 Abstract: Explains advances in SAT solvers and how practical solving often outpaces worst-case complexity predictions. Discusses implications for understanding NP-completeness in practice....

2. The Complexity of Theorem-Proving Procedures
   🔗 URL: https://www.cs.toronto.edu/~sacook/homepage/1971.pdf
   🏷️ Tag: General P vs NP
   ⭐ Centrality Score: 0.0625
   📜 Abstract: Introduces the concept of NP-completeness and establishes SAT as NP-complete. Provides the foundational link between search problems and computational complexity....

3. Relativization and the P=NP Question
   🔗 URL: https://eccc.weizmann.ac.il/report/1994/011/
   🏷️ Tag: General P vs NP
   ⭐ Centrality Score: 0.0625
   📜 Abstract: Discusses the concept of relativization, showing that both P=NP and P≠NP 

#Explore ArXiv to identify papers that are connected to these concept hubs

In [None]:
import arxiv

# Function to search arXiv for related papers
def search_arxiv_related(keywords, max_results=5):
    query = ' AND '.join(keywords)
    search = arxiv.Search(
        query=query,
        max_results=max_results,
        sort_by=arxiv.SortCriterion.Relevance,
        sort_order=arxiv.SortOrder.Descending
    )
    return list(search.results())

# Expand graph from top hubs
for node, _ in top_hubs:
    hub_paper = G.nodes[node]

    # Extract keywords from title (simple split, can be smarter later)
    keywords = hub_paper['label'].split()[:5]  # First 5 words as rough keywords

    related_papers = search_arxiv_related(keywords, max_results=5)

    for related in related_papers:
        new_id = related.entry_id

        if new_id not in G:
            G.add_node(new_id,
                       label=related.title,
                       abstract=related.summary,
                       url=related.entry_id,
                       tag="Expanded",
                       color="#d9d9d9",  # Light gray for expansions
                       size=12)

            G.add_edge(node, new_id, weight=0.5)  # Weaker weight for new links

print(f"✅ Graph expanded with {len(related_papers)} new nodes per hub!")


  return list(search.results())


✅ Graph expanded with 5 new nodes per hub!


In [None]:
import arxiv
import networkx as nx
import json
from arxiv import Client


# Prepare the graph data for JSON output
graph_data = {
    "nodes": [],
    "edges": []
}

# Add nodes to the JSON data
for node, data in G.nodes(data=True):
    graph_data["nodes"].append({
        "id": node,
        "label": data['label'],
        "abstract": data['abstract'],
        "url": data['url'],
        "tag": data['tag'],
        "color": data['color'],
        "size": data['size']
    })

# Add edges to the JSON data
for source, target, data in G.edges(data=True):
    graph_data["edges"].append({
        "source": source,
        "target": target,
        "weight": data.get('weight', 0.5)  # Default weight is 0.5 if 'weight' is missing
    })

# Save the graph as a JSON file
with open("graph_expanded.json", "w") as f:
    json.dump(graph_data, f, indent=4)

print("✅ Expanded graph saved in 'graph_expanded.json'!")



✅ Expanded graph saved in 'graph_expanded.json'!


#Load the generated graph into Neo4j

In [None]:
import json
from neo4j import GraphDatabase

# Neo4j connection details
uri = ""  # Neo4j default URI
username = ""  # Default username from Neo4j aura console
password = ""  # Set your Neo4j password here

# Initialize Neo4j driver
driver = GraphDatabase.driver(uri, auth=(username, password))




In [None]:
# Load the JSON file
with open('graph_expanded.json', 'r') as f:
    graph_data = json.load(f)

# Function to create nodes and edges in Neo4j
def create_graph(tx, graph_data):
    # Create nodes
    for node in graph_data['nodes']:
        query = """
        MERGE (p:Paper {id: $id})
        SET p.label = $label, p.abstract = $abstract, p.url = $url, p.tag = $tag, p.color = $color, p.size = $size
        """
        tx.run(query, id=node['id'], label=node['label'], abstract=node['abstract'], url=node['url'],
               tag=node['tag'], color=node['color'], size=node['size'])

    # Create relationships (edges)
    for edge in graph_data['edges']:
        query = """
        MATCH (p1:Paper {id: $source}), (p2:Paper {id: $target})
        MERGE (p1)-[:CONNECTED {weight: $weight}]->(p2)
        """
        tx.run(query, source=edge['source'], target=edge['target'], weight=edge['weight'])

# Function to execute graph creation
def load_to_neo4j(graph_data):
    with driver.session() as session:
        session.write_transaction(create_graph, graph_data)

# Load the graph into Neo4j
load_to_neo4j(graph_data)

print("✅ Graph successfully loaded into Neo4j!")

  session.write_transaction(create_graph, graph_data)


✅ Graph successfully loaded into Neo4j!
