# Summary

This notebook contains steps to

*   Use Stanford Core NLP to build entities and relationships for Anatomy Book
*   Setup Neo4J Graph DB locally and load the entities and relationships
*   Given a question, extracts all the relationships for entities in that question
*   Connect to OpenAI
*   Construct a Prompt. The Prompt should include the question as well as the entity relationships that were extracted








# Install the StanfordCoreNLP Dependencies

In [None]:
! pip install stanfordcorenlp
! pip install pycorenlp

Collecting stanfordcorenlp
  Using cached stanfordcorenlp-3.9.1.1-py2.py3-none-any.whl.metadata (1.3 kB)
Using cached stanfordcorenlp-3.9.1.1-py2.py3-none-any.whl (5.7 kB)
Installing collected packages: stanfordcorenlp
Successfully installed stanfordcorenlp-3.9.1.1
Collecting pycorenlp
  Using cached pycorenlp-0.3.0.tar.gz (1.3 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: pycorenlp
  Building wheel for pycorenlp (setup.py): started
  Building wheel for pycorenlp (setup.py): finished with status 'done'
  Created wheel for pycorenlp: filename=pycorenlp-0.3.0-py3-none-any.whl size=2129 sha256=f794966ee0977459456cd3ccdda7921f9a2695e706ebb2721b10767149496acf
  Stored in directory: c:\users\aliim\appdata\local\pip\cache\wheels\17\02\ab\eb36312f7e3055fed374e5f0c129514fb7aca9a143f1b1b8a3
Successfully built pycorenlp
Installing collected packages: pycorenlp
Successfully installed pycorenlp-0.3.0

# Start and Connect to the StanfordCoreNLP Server

In [None]:
#before executing this code make sure to start the server
# cd stanford-corenlp-full-2018-10-05
# java -mx6g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -timeout 5000



import os
from pycorenlp import StanfordCoreNLP

# Start Stanford CoreNLP server
nlp = StanfordCoreNLP('http://localhost:9000')

# Utility Functions to extract the entities and relationships

In [None]:
def extract_entities(text):
    props = {'annotators': 'tokenize,ssplit,pos,lemma,ner',
             'pipelineLanguage': 'en',
             'outputFormat': 'json'}
    result = nlp.annotate(text, properties=props)
    # Process the output to extract entities
    entities = []
    try:
        import json
        output = json.loads(result)
        for sentence in output['sentences']:
            for entity in sentence['entitymentions']:
                entities.append({'text': entity['text'], 'label': entity['ner']})
    except json.JSONDecodeError as e:
        print(f"Error processing JSON output from Stanford CoreNLP: {e}")

    return entities


In [None]:
def extract_relationships(text):
    props = {'annotators': 'tokenize,ssplit,pos,depparse,openie',
             'pipelineLanguage': 'en'}
    result = nlp.annotate(text, properties=props)
    # Process the output to extract relationships
    relationships = []
    try:
        import json
        output = json.loads(result)
        for sentence in output['sentences']:
            for relation in sentence.get('openie', []):
                relationships.append({
                    'subject': relation['subject'],
                    'relation': relation['relation'],
                    'object': relation['object']
                })
    except:
        print("Error processing JSON output from Stanford CoreNLP.")

    return relationships

# Install Neo4J

In [None]:
! pip install neo4j




Collecting neo4j
  Using cached neo4j-5.27.0-py3-none-any.whl.metadata (5.9 kB)
Using cached neo4j-5.27.0-py3-none-any.whl (301 kB)
Installing collected packages: neo4j
Successfully installed neo4j-5.27.0


# Neo4J Utility Methods

In [None]:
# Please make sure to download Neo4j and start it locally. The Uri and the password will be available once Neo4J is started

from neo4j import GraphDatabase

# Connect to Neo4j
uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "hello1234"))

# Create nodes and relationships in Neo4j
def create_graph(tx, entities, relationships):
    # Add entities as nodes
    #for entity in entities:
    #    print(entity["text"])
    # tx.run("MERGE (e:Entity {name: $name})", name=entity["text"])
    for rel in relationships:
        tx.run("MERGE (e:Entity {name: $name})", name=rel["subject"])
        tx.run("MERGE (e:Entity {name: $name})", name=rel["object"])

    # Add relationships as edges
    for rel in relationships:
        tx.run("""
        MATCH (e1:Entity {name: $source}), (e2:Entity {name: $target})
        MERGE (e1)-[:RELATIONSHIP {type: $relation}]->(e2)
        """, source=rel["subject"], target=rel["object"], relation=rel["relation"])

# Populate the graph
def build_neo4j_graph(entities, relationships):
    with driver.session() as session:
        session.write_transaction(create_graph, entities, relationships)

In [None]:
text = "John Doe, was diagnosed with Type 2 diabetes mellitus (T2DM) and hypertension by Dr. Smith at St. John's Hospital"
entities = extract_entities(text)
relationships = extract_relationships(text)

print("Entities:", entities)
print("Relationships:", relationships)

Entities: [{'text': 'John Doe', 'label': 'PERSON'}, {'text': '2', 'label': 'NUMBER'}, {'text': 'diabetes mellitus', 'label': 'CAUSE_OF_DEATH'}, {'text': 'hypertension', 'label': 'CAUSE_OF_DEATH'}, {'text': 'Smith', 'label': 'PERSON'}, {'text': "St. John's Hospital", 'label': 'ORGANIZATION'}]
Relationships: [{'subject': 'John Doe', 'relation': 'was', 'object': 'diagnosed'}, {'subject': 'John Doe', 'relation': 'was diagnosed with', 'object': 'Type 2 diabetes mellitus'}, {'subject': 'John Doe', 'relation': 'was', 'object': "diagnosed with Type 2 diabetes mellitus at St. John 's Hospital"}, {'subject': 'St. John', 'relation': 'at', 'object': 'Hospital'}, {'subject': 'John Doe', 'relation': 'was diagnosed with', 'object': 'T2DM'}, {'subject': 'John Doe', 'relation': 'was diagnosed at', 'object': "St. John 's Hospital"}]


In [None]:
! pip install datasets



In [None]:
from datasets import load_dataset
dataset = load_dataset("json", data_files=r"C:\\Users\\aliim\projects\\files\\AI_Healthcare\\textbooks\\chunk\\Anatomy_Gray.jsonl", split='train')
index = 0
for entry in dataset:
    relationships = extract_relationships(entry["content"])
    print(relationships)
    break;
#   build_neo4j_graph(None, relationships)
#    print("Done processing entry " + str(index))
    index+=1

[{'subject': 'Anatomy', 'relation': 'includes', 'object': 'structures'}, {'subject': 'term anatomy', 'relation': 'mean', 'object': 'gross anatomy'}, {'subject': 'term anatomy', 'relation': 'used by', 'object': 'itself'}, {'subject': 'term anatomy', 'relation': 'mean', 'object': 'anatomy'}, {'subject': 'Microscopic anatomy', 'relation': 'is study of', 'object': 'cells'}, {'subject': 'anatomy', 'relation': 'also called', 'object': 'histology'}, {'subject': 'anatomy', 'relation': 'called', 'object': 'histology'}, {'subject': 'study', 'relation': 'using', 'object': 'microscope'}, {'subject': 'anatomy', 'relation': 'is study of', 'object': 'cells'}, {'subject': 'Microscopic anatomy', 'relation': 'is', 'object': 'study'}, {'subject': 'Microscopic anatomy', 'relation': 'called', 'object': 'histology'}, {'subject': 'Microscopic anatomy', 'relation': 'also called', 'object': 'histology'}, {'subject': 'anatomy', 'relation': 'is', 'object': 'study'}, {'subject': 'Anatomy', 'relation': 'leads phys

In [None]:
from datasets import load_dataset
dataset = load_dataset("json", data_files=r"C:\\Users\\aliim\projects\\files\\AI_Healthcare\\textbooks\\chunk\\Anatomy_Gray.jsonl", split='train')
index = 0
for entry in dataset:
    # Define the file path
    file_path = "C:\\Users\\aliim\projects\\files\\AI_Healthcare\\textbooks\\chunk\\Anatomy_Gray\\data" + str(index) + ".txt"
    # Open the file in write mode ("w")
    with open(file_path, "w") as file:
        # Write the string to the file
        file.write(entry["content"])
    index+=1

  file_path = "C:\\Users\\aliim\projects\\files\\AI_Healthcare\\textbooks\\chunk\\Anatomy_Gray\\data" + str(index) + ".txt"


In [None]:
def print_neo4j_graph():
    """
    Prints the nodes and relationships in a Neo4j graph.

    Args:
        uri: The URI of the Neo4j database.
        auth: Authentication credentials (username, password).
    """
    try:
        with driver.session() as session:
            # Get all nodes
            result = session.run("MATCH (n) RETURN n")
            nodes = [record["n"] for record in result]

            # Get all relationships
            result = session.run("MATCH ()-[r]->() RETURN r")
            relationships = [record["r"] for record in result]

            # Print nodes
            print("Nodes:")
            for node in nodes:
                print(f"  - {node.id}: {node.labels} - {node.get('properties', {})}") # Print Node ID, labels and properties

            # Print relationships
            print("\nRelationships:")
            for relationship in relationships:
                print(f"  - {relationship.id}: {relationship.type} - ({relationship.start_node.id}) -> ({relationship.end_node.id}) - {relationship.get('properties', {})}") # Print relationship ID, type, start and end node and properties

    except Exception as e:
        print(f"Error: {e}")


In [None]:
def query_graph(entity_name):
    with driver.session() as session:
        query = """
        MATCH (e1:Entity {name: $name})-[r]->(e2:Entity)
        RETURN e1.name AS source, r.type AS relation, e2.name AS target
        """
        results = session.run(query, name=entity_name)
        return [{"source": record["source"], "relation": record["relation"], "target": record["target"]} for record in results]

In [None]:
def delete_graph():
  with driver.session() as session:
    query = """
    MATCH (n) DETACH DELETE n
    """
    session.run(query)

In [None]:
result = query_graph("Anatomy")
print(result)


#Install OpenAI

In [None]:
! pip install openai



# Open AI Utility methods

In [None]:
import openai

# Set your OpenAI API key
openai.api_key = "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"

def query_openai(prompt, model="gpt-4o", temperature=0.7, max_tokens=200):
    """
    Send a query to OpenAI's LLM and return the response.

    Parameters:
    - prompt (str): The query or instruction for the LLM.
    - model (str): The model to use (e.g., "gpt-4", "gpt-3.5-turbo").
    - temperature (float): Controls randomness (0.0 for deterministic, 1.0 for creative).
    - max_tokens (int): Maximum number of tokens to generate in the response.

    Returns:
    - str: The LLM response.
    """
    try:
        response = openai.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            temperature=temperature,
            max_tokens=max_tokens
        )
        return response.choices[0].message.content
    except openai.error.OpenAIError as e:
        return f"Error: {e}"

# Example: Ask Question, Extract Entity and Relationships, Ask OpenAI for response

In [None]:
# Example query
question = """
A lesion causing compression of the facial nerve at the stylomastoid foramen will cause ipsilateral

A. paralysis of the facial muscles.
B. paralysis of the facial muscles and loss of taste.
C. paralysis of the facial muscles, loss of taste and lacrimation.
D. paralysis of the facial muscles, loss of taste, lacrimation and decreased salivation.

"""

entities = extract_entities(question)
relationships = extract_relationships(question)

entity_list = []
for entity in entities:
  entity_list.append(entity["text"])
for relationship in relationships:
  entity_list.append(relationship["subject"])
  entity_list.append(relationship["object"])

entityInfo = []
for entity in entity_list:
  result = query_graph(entity)
  for r in result:
    entityInfo.append(r["source"] + " " + r["relation"] + " " + r["target"] + "\n")

prompt = question + "\nPlease use only the following information in the prompt to answer the question. Also please specify whether information from any other sources was used.\n" + "".join(entityInfo)
print(prompt)

response = query_openai(prompt)
print("Response:", response)


A lesion causing compression of the facial nerve at the stylomastoid foramen will cause ipsilateral

A. paralysis of the facial muscles.
B. paralysis of the facial muscles and loss of taste.
C. paralysis of the facial muscles, loss of taste and lacrimation.
D. paralysis of the facial muscles, loss of taste, lacrimation and decreased salivation.


Please use only the following information in the prompt to answer the question. Also please specify whether information from any other sources was used.
lesion is proximal
lesion is in brain
lesion was on side
lesion results in eye
lesion be along course
lesion is likely affected
lesion possibly caused by hypersensitivity response to release
lesion caused by hypersensitivity response to release
lesion may develop In minority
lesion possibly caused by hypersensitivity response to release in tissues
lesion caused by hypersensitivity response to release in tissues
lesion possibly caused by hypersensitivity response
lesion caused by hypersensitiv