The final library and code used is after experimentation

Experimenting different approaches :

In [1]:
pip install networkx matplotlib py2neo pyvis


Defaulting to user installation because normal site-packages is not writeable
Collecting py2neo
  Downloading py2neo-2021.2.4-py2.py3-none-any.whl.metadata (9.9 kB)
Collecting pyvis
  Downloading pyvis-0.3.2-py3-none-any.whl.metadata (1.7 kB)
Collecting interchange~=2021.0.4 (from py2neo)
  Downloading interchange-2021.0.4-py2.py3-none-any.whl.metadata (1.9 kB)
Collecting monotonic (from py2neo)
  Downloading monotonic-1.6-py2.py3-none-any.whl.metadata (1.5 kB)
Collecting pansi>=2020.7.3 (from py2neo)
  Downloading pansi-2024.11.0-py2.py3-none-any.whl.metadata (3.1 kB)
Collecting jsonpickle>=1.4.1 (from pyvis)
  Downloading jsonpickle-4.0.2-py3-none-any.whl.metadata (8.2 kB)
Downloading py2neo-2021.2.4-py2.py3-none-any.whl (177 kB)
Downloading pyvis-0.3.2-py3-none-any.whl (756 kB)
   ---------------------------------------- 0.0/756.0 kB ? eta -:--:--
   --------------------------------------- 756.0/756.0 kB 10.5 MB/s eta 0:00:00
Downloading interchange-2021.0.4-py2.py3-none-any.whl (28

In [29]:
import json
from pyvis.network import Network
import networkx as nx


In [45]:
def produce_KG(json_filename):
    # Load the dataset from JSON file
    with open(f"../../dataset/{json_filename}.json", "r") as file:
        data = json.load(file)

    # Create a NetworkX graph
    G = nx.Graph()

    # Create unique nodes for categories
    confidence_levels = set()
    sentiments = set()
    inefficiencies = set()

    # Add nodes for each question and their respective attributes
    for index, entry in enumerate(data):
        try:
            q_id = entry["question_id"]
            confidence = entry["confidence_level"]
            sentiment = entry["sentiment"]
            reasoning = entry["reasoning"]
        except:
            print(f'Passing: index = {index}')
            pass

        # Format reasoning with actual line breaks
        formatted_reasoning = "\n".join(reasoning[i:i+80] for i in range(0, len(reasoning), 80))

        # Ensure unique category nodes
        confidence_levels.add(confidence)
        sentiments.add(sentiment)

        try:
            # Add inefficiency categories
            for ineff in entry["inefficiencies"]:
                inefficiencies.add(ineff)


            # Add question node with formatted reasoning
            G.add_node(q_id, label=f"Q{q_id}", shape="circle", title=formatted_reasoning)

            # Connect question to confidence level, sentiment, and inefficiencies
            G.add_edge(q_id, confidence)
            G.add_edge(q_id, sentiment)
            for ineff in entry["inefficiencies"]:
                G.add_edge(q_id, ineff)
        except: 
            print(f'Passing: index = {index}')
            pass

    # Add category nodes to the graph
    for conf in confidence_levels:
        G.add_node(conf, label=conf, color="red", shape="square")

    for sent in sentiments:
        G.add_node(sent, label=sent, color="blue", shape="square")

    for ineff in inefficiencies:
        G.add_node(ineff, label=ineff, color="green", shape="square")

    # Create a PyVis network
    net = Network(notebook=True, directed=False)

    # Enable physics to improve layout
    net.toggle_physics(True)

    # Convert to PyVis format
    net.from_nx(G)

    # Save and display the graph
    net.save_graph(f"../../dataset/{json_filename}_KG.html")
    print(f"Graph saved as {json_filename}_KG.html")

In [47]:
produce_KG('Inefficient_reasoning_bter_normalized')

Graph saved as Inefficient_reasoning_bter_normalized_KG.html


In [None]:
produce_KG('Inefficient_reasoning_bter')

net.save_graph("/Users/bisman/Documents/ECS 260/Project github/CodeRefineAI/llm/RQ1/my_interactive_graph.html")
/Users/bisman/Documents/ECS 260/Project github/CodeRefineAI/dataset/RQ1KG/Inefficient_reasoning_bter.json

Tested a better layout of the graph using the working code above : 

In [48]:
import json
from pyvis.network import Network
import networkx as nx

# Load the dataset from JSON file
with open('../../dataset/Inefficient_reasoning_bter_normalized.json', "r") as file:
    data = json.load(file)

# Create a NetworkX graph
G = nx.Graph()

# Create unique nodes for categories
confidence_levels = set()
sentiments = set()
inefficiencies = set()

# Add nodes for each question and their respective attributes
for entry in data:
    q_id = entry["question_id"]
    confidence = entry["confidence_level"]
    sentiment = entry["sentiment"]
    reasoning = entry["reasoning"]

    # Format reasoning with actual line breaks
    formatted_reasoning = "\n".join(reasoning[i:i+80] for i in range(0, len(reasoning), 80))

    # Ensure unique category nodes
    confidence_levels.add(confidence)
    sentiments.add(sentiment)
    
    # Add inefficiency categories
    for ineff in entry["inefficiencies"]:
        inefficiencies.add(ineff)

    # Add question node with formatted reasoning
    G.add_node(q_id, label=f"Q{q_id}", shape="circle", title=formatted_reasoning)

    # Connect question to category nodes (only one edge per category type)
    G.add_edge(q_id, confidence)
    G.add_edge(q_id, sentiment)
    # Use a single edge to link inefficiencies with the question
    for ineff in entry["inefficiencies"]:
        G.add_edge(q_id, ineff)

# Add category nodes to the graph with distinct colors for clarity
for conf in confidence_levels:
    G.add_node(conf, label=conf, color="red", shape="square", size=25)

for sent in sentiments:
    G.add_node(sent, label=sent, color="blue", shape="square", size=25)

for ineff in inefficiencies:
    G.add_node(ineff, label=ineff, color="green", shape="square", size=25)

# Create a PyVis network
net = Network(notebook=True, directed=False)

# Enable physics for better layout but make it more structured
net.toggle_physics(True)

# Convert the graph to PyVis format and apply better physics for a less cluttered graph
net.from_nx(G)

# Adjusting the physics layout to organize nodes better
net.set_options("""
var options = {
  "nodes": {
    "shape": "circle",
    "size": 15
  },
  "edges": {
    "smooth": {
      "type": "continuous"
    },
    "width": 1
  },
  "physics": {
    "barnesHut": {
      "gravitationalConstant": -5000,
      "centralGravity": 0.2,
      "springLength": 200,
      "springConstant": 0.05,
      "damping": 0.15,
      "avoidOverlap": 1.2
    },
    "minVelocity": 0.75
  }
}
""")

# Manually adjust node positions (category nodes in a structured manner)
category_positions = {
    "red": {"x": 0, "y": 100},  # Confidence category
    "blue": {"x": 200, "y": 100},  # Sentiment category
    "green": {"x": 400, "y": 100}  # Inefficiency category
}

# Manually position the category nodes
for node in G.nodes:
    if node in confidence_levels:
        net.get_node(node)["x"] = category_positions["red"]["x"]
        net.get_node(node)["y"] = category_positions["red"]["y"]
    elif node in sentiments:
        net.get_node(node)["x"] = category_positions["blue"]["x"]
        net.get_node(node)["y"] = category_positions["blue"]["y"]
    elif node in inefficiencies:
        net.get_node(node)["x"] = category_positions["green"]["x"]
        net.get_node(node)["y"] = category_positions["green"]["y"]

# Save and display the graph
output_file = "../../dataset/structured_interactive_graph.html"
net.save_graph(output_file)
print(f"Structured graph saved as {output_file}")


Structured graph saved as ../../dataset/structured_interactive_graph.html
