In [None]:
%pip install dotenv networkx ipycytoscape ipywidgets

In [161]:
import requests
import json
from dotenv import load_dotenv
import os
import pandas as pd       
import re
import networkx as nx    
import ipycytoscape      
import ipywidgets       

# Configure settings for better display and fewer warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)
pd.set_option('display.max_rows', 15) # Show more rows in pandas tables
pd.set_option('display.max_colwidth', 150) # Show more text width in pandas tables

print("Libraries imported successfully.")

Libraries imported successfully.


In [121]:
# Load environment variables from .env file
load_dotenv()
# Read the API key
Bearer_key = os.getenv("OPENROUTER_API_KEY")
# Define the llm
model_name = "qwen/qwen3-235b-a22b:free"

In [103]:
kb_chunks_dir = './kb_chunks'
all_chunks = []

for json_filename in os.listdir(kb_chunks_dir):
    json_path = os.path.join(kb_chunks_dir, json_filename)
    if json_filename.endswith('.json'):
        with open(json_path, 'r', encoding='utf-8') as f:
            chunk_data = json.load(f)

        all_chunks.append(chunk_data)

print(f"Processed {len(all_chunks)} chunks.")

Processed 447 chunks.


## Define Input Text


In [104]:
chunks = all_chunks[:12]
chunks_df = pd.DataFrame(chunks)
chunks_df.set_index('chunk_id', inplace=True)
chunks_df['word_count'] = chunks_df['content'].apply(lambda x: len(x.split()))
chunks_df.sort_index(inplace=True)
chunks = chunks_df.to_dict(orient='records')
display(chunks_df[['word_count', 'content']])

Unnamed: 0_level_0,word_count,content
chunk_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,45,"About Asthma\nKey points\nAsthma is a disease that affects your lungs.\nIt is one of the most common long-term diseases of children, but adults ca..."
2,38,"Symptoms\nAsthma causes wheezing, breathlessness, chest tightness, and coughing at night or early in the morning. If you have asthma, you have it..."
3,88,"An asthma attack may include coughing, chest tightness, wheezing, and trouble breathing. The attack happens in your body's airways, which are the ..."
4,55,"You can control your asthma by knowing the warning signs of an asthma attack, staying away from things that cause an attack, and following your do..."
5,71,"During a checkup, a doctor may ask if you cough a lot, especially at night. He or she may also ask whether your breathing problems are worse after..."
6,22,. The doctor may ask about your home and whether you have missed school or work or have trouble doing certain things.
7,74,"The doctor may also do a breathing test, called spirometry, to find out how well your lungs are working. Spirometry tests how much air you breathe..."
8,86,You can breathe in some medicines and take other medicines as a pill. Asthma medicines can be used in several ways -- as reliever therapy or as co...
9,63,Resources and tools\nAsthma information in other languages:\nArabic\nChinese\nFrench\nGerman\nHaitian\nHindi\nKorean\nMongolian\nMyanmar (Burmese)...
10,76,The checklist is organized into a Core Assessment plus two appendices (Dust Mite Module and Mold and Moisture Module). The Core Assessment can be ...


In [105]:
def parse_llm_output(llm_output, chunk_id):
    separated_lines = llm_output.split('\n')
    
    # Check if the first line is the header
    if separated_lines[0] == "subject,predicate,object":
        json_lines = []
        
        # Iterate over the rest of the lines
        for line in separated_lines[1:]:
            # Strip any leading/trailing whitespace from the line and split it
            parts = [x.strip() for x in line.split(',')]
            
            # Ensure there are exactly three parts and they are not empty
            if len(parts) == 3 and all(parts):
                subject, predicate, Object = parts
                json_lines.append({"subject": subject, "predicate": predicate, "object": Object, "chunk": chunk_id})
        
        return json_lines
    return [] 

## Define the LLM Prompt for Extraction

In [106]:
# System prompt
system_prompt = """You are an AI specialized in extracting factual information from medical and health-related texts, particularly in the respiratory health domain. 
Your task is to identify and extract factual Subject-Predicate-Object (SPO) triples only from the given text, focusing on health-related information such as diseases, symptoms, treatments, and causes.
Ensure that the output is clear and strictly formatted as requested."""

# User prompt template (for formatting with each chunk)
user_prompt_template = """Please extract Subject-Predicate-Object (S-P-O) triples from the text below.

VERY IMPORTANT RULES:
1. Output Format: Respond ONLY with CSV format, without any explanations.
2. Header: The CSV must start with subject,predicate,object.
3. Concise Predicates: Keep the 'predicate' value concise (1-3 words, ideally 1-2). Use verbs or short verb phrases 
4. Lowercase: ALL values for 'subject', 'predicate', and 'object' MUST be lowercase.
5. Pronoun Resolution: Replace pronouns (she, he, it, her, etc.) with the specific lowercase entity name they refer to based on the text context.
6. Specificity: Capture specific details.
7. Completeness: Extract all distinct factual relationships mentioned.

Text to Process:
{text_chunk}

Your CSV Output (MUST start directly with the header 'subject,predicate,object'):
"""

## LLM Interaction - Extracting Triples (Chunk by Chunk)


In [187]:
all_extracted_triples = []
failed_chunks = 0

for chunk_index, chunk_info in enumerate(chunks):
    chunk_text = chunk_info['content']
    print(f"--- Processing Chunk {chunk_index+1}/{len(chunks)} ---")

    # Format user prompt with chunk
    formatted_user_prompt = user_prompt_template.format(text_chunk=chunk_text)

    # Send API request
    response = requests.post(
        url="https://openrouter.ai/api/v1/chat/completions",
        headers={
            "Authorization": Bearer_key,
            "Content-Type": "application/json",
        },
        data=json.dumps({
            "model": model_name,
            "messages": [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": formatted_user_prompt}
            ],
        })
    )

    # Handle the response
    if response.status_code == 200:
        outputs = response.json()
        if 'error' in outputs:
            print(outputs['error']['message'])
            failed_chunks += 1
        else:
            outputs = outputs['choices'][0]['message']['content']
            all_extracted_triples.extend(parse_llm_output(outputs, chunk_index))
    else:
        print(f"Error: Request failed with status code {response.status_code}")
        print("Raw response:", response.text)
        failed_chunks += 1

--- Processing Chunk 1/12 ---
Rate limit exceeded: free-models-per-day. Add 10 credits to unlock 1000 free model requests per day
--- Processing Chunk 2/12 ---
Rate limit exceeded: free-models-per-day. Add 10 credits to unlock 1000 free model requests per day
--- Processing Chunk 3/12 ---
Rate limit exceeded: free-models-per-day. Add 10 credits to unlock 1000 free model requests per day
--- Processing Chunk 4/12 ---
Rate limit exceeded: free-models-per-day. Add 10 credits to unlock 1000 free model requests per day
--- Processing Chunk 5/12 ---
Rate limit exceeded: free-models-per-day. Add 10 credits to unlock 1000 free model requests per day
--- Processing Chunk 6/12 ---
Rate limit exceeded: free-models-per-day. Add 10 credits to unlock 1000 free model requests per day
--- Processing Chunk 7/12 ---
Rate limit exceeded: free-models-per-day. Add 10 credits to unlock 1000 free model requests per day
--- Processing Chunk 8/12 ---
Rate limit exceeded: free-models-per-day. Add 10 credits to 

### Extraction Summary (After Processing All Chunks)


In [189]:
print(f"\n--- Overall Extraction Summary ---\n")
print(f"Total chunks defined: {len(chunks)}\")\n")
# Assuming full run for summary logic
processed_chunks = len(chunks) # Approximation if loop isn't run fully
print(f"Chunks processed (attempted): {processed_chunks}") # Chunks we looped through
print(f"Total valid triples extracted across all processed chunks: {len(all_extracted_triples)}")
print(f"Number of chunks that failed API call or parsing: {failed_chunks}")


--- Overall Extraction Summary ---

Total chunks defined: 12")

Chunks processed (attempted): 12
Total valid triples extracted across all processed chunks: 0
Number of chunks that failed API call or parsing: 12


In [147]:
all_extracted_triples= [
  {"subject": "asthma", "predicate": "is a", "object": "disease", "chunk": 1},
  {"subject": "asthma", "predicate": "affects", "object": "lungs", "chunk": 1},
  {"subject": "asthma", "predicate": "is", "object": "one of the most common long-term diseases of children", "chunk": 1},
  {"subject": "adults", "predicate": "can have", "object": "asthma", "chunk": 1},
  {"subject": "asthma", "predicate": "causes", "object": "wheezing, breathlessness, chest tightness, coughing at night or early in the morning", "chunk": 1},
  {"subject": "asthma attack", "predicate": "may include", "object": "coughing, chest tightness, wheezing, trouble breathing", "chunk": 1},
  {"subject": "asthma attack", "predicate": "happens in", "object": "airways", "chunk": 1},
  {"subject": "airways", "predicate": "carry", "object": "air to lungs", "chunk": 1},
  {"subject": "sides of the airways", "predicate": "swell", "object": "during asthma attack", "chunk": 1},
  {"subject": "airways", "predicate": "shrink", "object": "during asthma attack", "chunk": 1},
  {"subject": "mucous", "predicate": "clogs up", "object": "airways", "chunk": 1},
  {"subject": "you", "predicate": "can control", "object": "asthma by knowing warning signs, avoiding triggers, following doctor's advice", "chunk": 1},
  {"subject": "doctor", "predicate": "can help", "object": "you find out if you have asthma", "chunk": 1},
  {"subject": "doctor", "predicate": "may ask about", "object": "coughing, breathing problems, chest tightness, wheezing, colds lasting more than 10 days, family history of asthma or allergies", "chunk": 1},
  {"subject": "doctor", "predicate": "may ask about", "object": "home conditions, missed school or work, trouble doing things", "chunk": 1},
  {"subject": "doctor", "predicate": "may do", "object": "breathing test called spirometry", "chunk": 1},
  {"subject": "spirometry", "predicate": "tests", "object": "how much air you breathe out before and after asthma medicine", "chunk": 1},
  {"subject": "asthma", "predicate": "can be controlled by", "object": "taking medicine as prescribed, avoiding triggers", "chunk": 1},
  {"subject": "asthma medicines", "predicate": "can be used as", "object": "reliever therapy, controller therapy", "chunk": 1},
  {"subject": "controller medicines", "predicate": "help", "object": "you have fewer and milder asthma attacks", "chunk": 1},
  {"subject": "asthma medicines", "predicate": "can have", "object": "side effects", "chunk": 1},
  {"subject": "side effects", "predicate": "are", "object": "mostly mild and temporary", "chunk": 1},
  {"subject": "home visitor", "predicate": "can help find", "object": "common asthma triggers in homes", "chunk": 1},
  {"subject": "removing asthma triggers in the home", "predicate": "can improve", "object": "health", "chunk": 1},
  {"subject": "core assessment", "predicate": "is used for", "object": "all types of housing and climates", "chunk": 1},
  {"subject": "dust mite module and mold and moisture module", "predicate": "can be used if", "object": "dust mites or mold/moisture issues are suspected", "chunk": 1},
  {"subject": "you", "predicate": "may have", "object": "questions about managing asthma during and after emergency", "chunk": 1},
  {"subject": "you", "predicate": "should follow", "object": "asthma action plan", "chunk": 1},
  {"subject": "you", "predicate": "should take", "object": "all asthma medication as prescribed", "chunk": 1},
  {"subject": "you", "predicate": "should not change", "object": "asthma treatment plan without talking to healthcare provider", "chunk": 1},
  {"subject": "you", "predicate": "should have", "object": "at least 30 days of non-prescription medications and supplies", "chunk": 1},
  {"subject": "you", "predicate": "should know", "object": "how to use inhaler", "chunk": 1},
  {"subject": "you", "predicate": "should avoid", "object": "asthma triggers", "chunk": 1},
  {"subject": "you", "predicate": "should take", "object": "everyday precautions", "chunk": 1}
]

## Normalize and De-duplicate Triples

In [149]:
# Initialize lists and tracking variables
normalized_triples = []
seen_triples = set() # Tracks (subject, predicate, object) tuples
original_count = len(all_extracted_triples)
empty_removed_count = 0
duplicates_removed_count = 0

print(f"Starting normalization and de-duplication of {original_count} triples...")

Starting normalization and de-duplication of 34 triples...


In [151]:
print("Processing triples (showing first 5):")

for i, t in enumerate(all_extracted_triples):
    s, p, o = [t.get(k, '').strip().lower() if isinstance(t.get(k), str) else '' for k in ['subject', 'predicate', 'object']]
    p = re.sub(r'\s+', ' ', p)

    if all([s, p, o]):
        key = (s, p, o)
        if key not in seen_triples:
            normalized_triples.append({'subject': s, 'predicate': p, 'object': o, 'source_chunk': t.get('chunk', '?')})
            seen_triples.add(key)
            if i < 5:
                print(f"#{i+1}: {key}\nStatus: Kept")
        else:
            duplicates_removed_count += 1
            if i < 5: print(f"#{i+1}: Duplicate - Skipped")
    else:
        empty_removed_count += 1
        if i < 5: print(f"\n#{i+1}: Invalid - Skipped")

print(f"Done. Total: {len(all_extracted_triples)}, Kept: {len(normalized_triples)}, Duplicates: {duplicates_removed_count}, Empty: {empty_removed_count}")

Processing triples (showing first 5):
#1: ('asthma', 'is a', 'disease')
Status: Kept
#2: ('asthma', 'affects', 'lungs')
Status: Kept
#3: ('asthma', 'is', 'one of the most common long-term diseases of children')
Status: Kept
#4: ('adults', 'can have', 'asthma')
Status: Kept
#5: ('asthma', 'causes', 'wheezing, breathlessness, chest tightness, coughing at night or early in the morning')
Status: Kept
Done. Total: 34, Kept: 34, Duplicates: 0, Empty: 0


In [157]:
# --- Summary of Normalization ---
print(f"--- Normalization & De-duplication Summary ---\n")
print(f"Original extracted triple count: {original_count}")
print(f"Triples removed (empty/invalid components): {empty_removed_count}")
print(f"Duplicate triples removed: {duplicates_removed_count}")
final_count = len(normalized_triples)
print(f"Final unique, normalized triple count: {final_count}")
print("-" * 25)

# Display a sample of normalized triples using Pandas
print("\n--- Final Normalized Triples ---")
if normalized_triples:
    normalized_df = pd.DataFrame(normalized_triples)
    display(normalized_df)
else:
    print("No valid triples remain after normalization.")

--- Normalization & De-duplication Summary ---

Original extracted triple count: 34
Triples removed (empty/invalid components): 0
Duplicate triples removed: 0
Final unique, normalized triple count: 34
-------------------------

--- Final Normalized Triples ---


Unnamed: 0,subject,predicate,object,source_chunk
0,asthma,is a,disease,1
1,asthma,affects,lungs,1
2,asthma,is,one of the most common long-term diseases of children,1
3,adults,can have,asthma,1
4,asthma,causes,"wheezing, breathlessness, chest tightness, coughing at night or early in the morning",1
...,...,...,...,...
29,you,should not change,asthma treatment plan without talking to healthcare provider,1
30,you,should have,at least 30 days of non-prescription medications and supplies,1
31,you,should know,how to use inhaler,1
32,you,should avoid,asthma triggers,1


## Build the Knowledge Graph with NetworkX

In [163]:
# Create an empty directed graph
knowledge_graph = nx.DiGraph()

print("Initialized an empty NetworkX DiGraph.")
# Visualize the initial empty graph state
print("--- Initial Graph Info ---")
try:
    # Try the newer method first
    print(nx.info(knowledge_graph))
except AttributeError:
    # Fallback for different NetworkX versions
    print(f"Type: {type(knowledge_graph).__name__}")
    print(f"Number of nodes: {knowledge_graph.number_of_nodes()}")
    print(f"Number of edges: {knowledge_graph.number_of_edges()}")
print("-" * 25)

Initialized an empty NetworkX DiGraph.
--- Initial Graph Info ---
Type: DiGraph
Number of nodes: 0
Number of edges: 0
-------------------------


In [165]:
print("Adding triples to the NetworkX graph...")

added_edges_count = 0
update_interval = 10 # How often to print graph info update

if not normalized_triples:
    print("Warning: No normalized triples to add to the graph.")
else:
    for i, triple in enumerate(normalized_triples):
        subject_node = triple['subject']
        object_node = triple['object']
        predicate_label = triple['predicate']

        # Add the directed edge with the predicate as a 'label' attribute
        knowledge_graph.add_edge(subject_node, object_node, label=predicate_label)
        added_edges_count += 1

        # --- Visualize Graph Growth ---
        if (i + 1) % update_interval == 0 or (i + 1) == len(normalized_triples):
            print(f"\n--- Graph Info after adding Triple #{i+1} --- ({subject_node} -> {object_node})")
            try:
                # Try the newer method first
                print(nx.info(knowledge_graph))
            except AttributeError:
                # Fallback for different NetworkX versions
                print(f"Type: {type(knowledge_graph).__name__}")
                print(f"Number of nodes: {knowledge_graph.number_of_nodes()}")
                print(f"Number of edges: {knowledge_graph.number_of_edges()}")
            # For very large graphs, printing info too often can be slow. Adjust interval.

print(f"\nFinished adding triples. Processed {added_edges_count} edges.")

Adding triples to the NetworkX graph...

--- Graph Info after adding Triple #10 --- (airways -> during asthma attack)
Type: DiGraph
Number of nodes: 12
Number of edges: 10

--- Graph Info after adding Triple #20 --- (controller medicines -> you have fewer and milder asthma attacks)
Type: DiGraph
Number of nodes: 27
Number of edges: 20

--- Graph Info after adding Triple #30 --- (you -> asthma treatment plan without talking to healthcare provider)
Type: DiGraph
Number of nodes: 41
Number of edges: 30

--- Graph Info after adding Triple #34 --- (you -> everyday precautions)
Type: DiGraph
Number of nodes: 45
Number of edges: 34

Finished adding triples. Processed 34 edges.


In [169]:
number_of_displays = 5

# --- Final Graph Statistics ---
num_nodes = knowledge_graph.number_of_nodes()
num_edges = knowledge_graph.number_of_edges()

print(f"\n--- Final NetworkX Graph Summary ---\n")
print(f"Total unique nodes (entities): {num_nodes}")
print(f"Total unique edges (relationships): {num_edges}")

if num_edges != added_edges_count and isinstance(knowledge_graph, nx.DiGraph):
     print(f"Note: Added {added_edges_count} edges, but graph has {num_edges}. DiGraph overwrites edges with same source/target. Use MultiDiGraph if multiple edges needed.")

if num_nodes > 0:
    try:
       density = nx.density(knowledge_graph)
       print(f"Graph density: {density:.4f}") # How connected the graph is
       if nx.is_weakly_connected(knowledge_graph): # Can you reach any node from any other, ignoring edge direction?
           print("The graph is weakly connected (all nodes reachable ignoring direction).")
       else:
           num_components = nx.number_weakly_connected_components(knowledge_graph)
           print(f"The graph has {num_components} weakly connected components.")
    except Exception as e:
        print(f"Could not calculate some graph metrics: {e}") # Handle potential errors on empty/small graphs
else:
    print("Graph is empty, cannot calculate metrics.")
print("-" * 25)

# --- Sample Nodes ---
print(f"\n--- Sample Nodes (First {number_of_displays}) ---")
if num_nodes > 0:
    nodes_sample = list(knowledge_graph.nodes())[:number_of_displays]
    display(pd.DataFrame(nodes_sample, columns=['Node Sample']))
else:
    print("Graph has no nodes.")

# --- Sample Edges ---
print(f"\n--- Sample Edges (First {number_of_displays} with Labels) ---")
if num_edges > 0:
    edges_sample = []
    for u, v, data in list(knowledge_graph.edges(data=True))[:number_of_displays]:
        edges_sample.append({'Source': u, 'Target': v, 'Label': data.get('label', 'N/A')})
    display(pd.DataFrame(edges_sample))
else:
    print("Graph has no edges.")
print("-" * 25)


--- Final NetworkX Graph Summary ---

Total unique nodes (entities): 45
Total unique edges (relationships): 34
Graph density: 0.0172
The graph has 11 weakly connected components.
-------------------------

--- Sample Nodes (First 5) ---


Unnamed: 0,Node Sample
0,asthma
1,disease
2,lungs
3,one of the most common long-term diseases of children
4,adults



--- Sample Edges (First 5 with Labels) ---


Unnamed: 0,Source,Target,Label
0,asthma,disease,is a
1,asthma,lungs,affects
2,asthma,one of the most common long-term diseases of children,is
3,asthma,"wheezing, breathlessness, chest tightness, coughing at night or early in the morning",causes
4,asthma,"taking medicine as prescribed, avoiding triggers",can be controlled by


-------------------------


## Visualize the Graph Interactively with ipycytoscape

In [171]:
print("Preparing interactive visualization...")

# --- Check Graph Validity for Visualization ---
can_visualize = False
if 'knowledge_graph' not in locals() or not isinstance(knowledge_graph, nx.Graph):
    print("Error: 'knowledge_graph' not found or is not a NetworkX graph.")
elif knowledge_graph.number_of_nodes() == 0:
    print("NetworkX Graph is empty. Cannot visualize.")
else:
    print(f"Graph seems valid for visualization ({knowledge_graph.number_of_nodes()} nodes, {knowledge_graph.number_of_edges()} edges).")
    can_visualize = True

Preparing interactive visualization...
Graph seems valid for visualization (45 nodes, 34 edges).


### 1. Convert NetworkX Data to Cytoscape Format

In [197]:
cytoscape_nodes = []
cytoscape_edges = []

if can_visualize:
    print("Converting nodes...")
    # Calculate degrees for node sizing
    node_degrees = dict(knowledge_graph.degree())
    max_degree = max(node_degrees.values()) if node_degrees else 1

    for node_id in knowledge_graph.nodes():
        degree = node_degrees.get(node_id, 0)
        # Simple scaling for node size (adjust logic as needed)
        node_size = 15 + (degree / max_degree) * 50 if max_degree > 0 else 15

        cytoscape_nodes.append({
            'data': {
                'id': str(node_id), # ID must be string
                'label': str(node_id).replace(' ', '\n'), # Display label (wrap spaces)
                'degree': degree,
                'size': node_size, # Store size for styling
                'tooltip_text': f"Entity: {str(node_id)}\nDegree: {degree}" # Tooltip on hover
            }
        })
    print(f"Converted {len(cytoscape_nodes)} nodes.")

    print("Converting edges...")
    edge_count = 0
    for u, v, data in knowledge_graph.edges(data=True):
        edge_id = f"edge_{edge_count}" # Unique edge ID
        predicate_label = data.get('label', '')
        cytoscape_edges.append({
            'data': {
                'id': edge_id,
                'source': str(u),
                'target': str(v),
                'label': predicate_label, # Label on edge
                'tooltip_text': f"Relationship: {predicate_label}" # Tooltip on hover
            }
        })
        edge_count += 1
    print(f"Converted {len(cytoscape_edges)} edges.")

    # Combine into the final structure
    cytoscape_graph_data = {'nodes': cytoscape_nodes, 'edges': cytoscape_edges}

    # Visualize the converted structure (first few nodes/edges)
    # print("\n--- Sample Cytoscape Node Data (First 2) ---")
    # print(json.dumps(cytoscape_graph_data['nodes'][:2], indent=2))
    # print("\n--- Sample Cytoscape Edge Data (First 2) ---")
    # print(json.dumps(cytoscape_graph_data['edges'][:2], indent=2))
    print("-" * 25)
else:
     print("Skipping data conversion as graph is not valid for visualization.")
     cytoscape_graph_data = {'nodes': [], 'edges': []}

Converting nodes...
Converted 45 nodes.
Converting edges...
Converted 34 edges.
-------------------------


### 2. Create and Configure the Cytoscape Widget

In [175]:
if can_visualize:
    print("Creating ipycytoscape widget...")
    cyto_widget = ipycytoscape.CytoscapeWidget()
    print("Widget created.")
    
    print("Loading graph data into widget...")
    cyto_widget.graph.add_graph_from_json(cytoscape_graph_data, directed=True)
    print("Data loaded.")
else:
    print("Skipping widget creation.")
    cyto_widget = None

Creating ipycytoscape widget...
Widget created.
Loading graph data into widget...
Data loaded.


### 3. Define Visual Style


In [177]:
if cyto_widget:
    print("Defining enhanced colorful and interactive visual style...")
    # More vibrant and colorful styling with a modern color scheme
    visual_style = [
        {
            'selector': 'node',
            'style': {
                'label': 'data(label)',
                'width': 'data(size)',
                'height': 'data(size)',
                'background-color': '#3498db',  # Bright blue
                'background-opacity': 0.9,
                'color': '#ffffff',             # White text
                'font-size': '12px',
                'font-weight': 'bold',
                'text-valign': 'center',
                'text-halign': 'center',
                'text-wrap': 'wrap',
                'text-max-width': '100px',
                'text-outline-width': 2,
                'text-outline-color': '#2980b9',  # Matching outline
                'text-outline-opacity': 0.7,
                'border-width': 3,
                'border-color': '#1abc9c',      # Turquoise border
                'border-opacity': 0.9,
                'shape': 'ellipse',
                'transition-property': 'background-color, border-color, border-width, width, height',
                'transition-duration': '0.3s',
                'tooltip-text': 'data(tooltip_text)'
            }
        },
        {
            'selector': 'node:selected',
            'style': {
                'background-color': '#e74c3c',  # Pomegranate red
                'border-width': 4,
                'border-color': '#c0392b',
                'text-outline-color': '#e74c3c',
                'width': 'data(size) * 1.2',    # Enlarge selected nodes
                'height': 'data(size) * 1.2'
            }
        },
        {
            'selector': 'node:hover',
            'style': {
                'background-color': '#9b59b6',  # Purple on hover
                'border-width': 4,
                'border-color': '#8e44ad',
                'cursor': 'pointer',
                'z-index': 999
            }
        },
        {
            'selector': 'edge',
            'style': {
                'label': 'data(label)',
                'width': 2.5,
                'curve-style': 'bezier',
                'line-color': '#2ecc71',         # Green
                'line-opacity': 0.8,
                'target-arrow-color': '#27ae60',
                'target-arrow-shape': 'triangle',
                'arrow-scale': 1.5,
                'font-size': '10px',
                'font-weight': 'normal',
                'color': '#2c3e50',
                'text-background-opacity': 0.9,
                'text-background-color': '#ecf0f1',
                'text-background-shape': 'roundrectangle',
                'text-background-padding': '3px',
                'text-rotation': 'autorotate',
                'edge-text-rotation': 'autorotate',
                'transition-property': 'line-color, width, target-arrow-color',
                'transition-duration': '0.3s',
                'tooltip-text': 'data(tooltip_text)'
            }
        },
        {
            'selector': 'edge:selected',
            'style': {
                'line-color': '#f39c12',         # Yellow-orange
                'target-arrow-color': '#d35400',
                'width': 4,
                'text-background-color': '#f1c40f',
                'color': '#ffffff',               # White text
                'z-index': 998
            }
        },
        {
            'selector': 'edge:hover',
            'style': {
                'line-color': '#e67e22',         # Orange on hover
                'width': 3.5,
                'cursor': 'pointer',
                'target-arrow-color': '#d35400',
                'z-index': 997
            }
        },
        {
            'selector': '.center-node',
            'style': {
                'background-color': '#16a085',    # Teal
                'background-opacity': 1,
                'border-width': 4,
                'border-color': '#1abc9c',        # Turquoise border
                'border-opacity': 1
            }
        }
    ]
    
    print("Setting enhanced visual style on widget...")
    cyto_widget.set_style(visual_style)
    
    # Apply a better animated layout
    cyto_widget.set_layout(name='cose', 
                          nodeRepulsion=5000, 
                          nodeOverlap=40, 
                          idealEdgeLength=120, 
                          edgeElasticity=200, 
                          nestingFactor=6, 
                          gravity=90, 
                          numIter=2500,
                          animate=True,
                          animationDuration=1000,
                          initialTemp=300,
                          coolingFactor=0.95)
    
    # Add a special class to main nodes (Marie Curie)
    if len(cyto_widget.graph.nodes) > 0:
        main_nodes = [node.data['id'] for node in cyto_widget.graph.nodes 
                     if node.data.get('degree', 0) > 10]
        
        # Create gradient styles for center nodes
        for i, node_id in enumerate(main_nodes):
            # Use vibrant colors for center nodes
            center_style = {
                'selector': f'node[id = "{node_id}"]',
                'style': {
                    'background-color': '#9b59b6',   # Purple
                    'background-opacity': 0.95,
                    'border-width': 4,
                    'border-color': '#8e44ad',      # Darker purple border
                    'border-opacity': 1,
                    'text-outline-width': 3,
                    'text-outline-color': '#8e44ad',
                    'font-size': '14px'
                }
            }
            visual_style.append(center_style)
        
        # Update the style with the new additions
        cyto_widget.set_style(visual_style)
    
    print("Enhanced colorful and interactive style applied successfully.")
else:
    print("Skipping style definition.")

Defining enhanced colorful and interactive visual style...
Setting enhanced visual style on widget...
Enhanced colorful and interactive style applied successfully.


### 4. Set Layout Algorithm

In [179]:
if cyto_widget:
    print("Setting layout algorithm ('cose')...")
    # cose (Compound Spring Embedder) is often good for exploring connections
    cyto_widget.set_layout(name='cose', 
                           animate=True, 
                           # Adjust parameters for better spacing/layout
                           nodeRepulsion=4000, # Increase repulsion 
                           nodeOverlap=40,    # Increase overlap avoidance
                           idealEdgeLength=120, # Slightly longer ideal edges
                           edgeElasticity=150, 
                           nestingFactor=5, 
                           gravity=100,        # Increase gravity slightly
                           numIter=1500,      # More iterations
                           initialTemp=200,
                           coolingFactor=0.95,
                           minTemp=1.0)
    print("Layout set. The graph will arrange itself when displayed.")
else:
     print("Skipping layout setting.")

Setting layout algorithm ('cose')...
Layout set. The graph will arrange itself when displayed.


### 5. Display the Interactive Graph

In [181]:
if cyto_widget:
    print("Displaying interactive graph widget below...")
    print("Interact: Zoom (scroll), Pan (drag background), Move Nodes (drag nodes), Hover for details.")
    display(cyto_widget)
else:
    print("No widget to display.")

# Add a clear separator
print("\n" + "-" * 25 + "\nEnd of Visualization Step." + "\n" + "-" * 25)

Displaying interactive graph widget below...
Interact: Zoom (scroll), Pan (drag background), Move Nodes (drag nodes), Hover for details.


CytoscapeWidget(cytoscape_layout={'name': 'cose', 'nodeRepulsion': 4000, 'nodeOverlap': 40, 'idealEdgeLength':…


-------------------------
End of Visualization Step.
-------------------------


**Potential Improvements and Further Exploration:**
*   **Run Full Loop:** Execute the LLM extraction and normalization across *all* chunks for a complete graph.
*   **Advanced Normalization:** Implement entity linking or relationship clustering.
*   **Error Handling:** Add retries for LLM calls, better handling of persistent chunk failures.
*   **Prompt Tuning:** Experiment with different models, prompts, and parameters.
*   **Evaluation:** Assess the quality of extracted triples (Precision/Recall).
*   **Richer Visualization:** Use node types for colors/shapes, add community detection coloring, implement more interactive features using ipycytoscape callbacks.
*   **Graph Analysis:** Apply `networkx` algorithms (centrality, paths, etc.).
*   **Persistence:** Store results in a graph database (Neo4j, etc.).