In [1]:
input_text = """
Medical Report:

Patient Information:

Name: John Doe
Date of Birth: 1990-02-12
Sex: Male
Medical History:

Diabetes Mellitus (Type 2) diagnosed in 2015
Hypertension diagnosed in 2018
Hyperlipidemia diagnosed in 2020
Current Medications:

Metformin 500mg twice daily
Lisinopril 10mg once daily
Atorvastatin 20mg once daily
Recent Medical Tests:

Blood Glucose Test: 120mg/dL (normal range: 70-100mg/dL)
Lipid Profile: Total Cholesterol 180mg/dL, HDL 40mg/dL, LDL 100mg/dL
Blood Pressure: 130/80mmHg (normal range: 120/80mmHg)
Diagnosis:

Diabetes Mellitus (Type 2)
Hypertension
Hyperlipidemia
Treatment Plan:

Lifestyle modifications: diet and exercise
Medications: Metformin, Lisinopril, Atorvastatin
Follow-up appointments: every 3 months
Healthcare Providers:

Primary Care Physician: Dr. Jane Smith
Endocrinologist: Dr. John Lee
Cardiologist: Dr. Michael Brown
"""

In [3]:
!pip install spacy
!python -m spacy download en_core_web_sm

Collecting typer<0.10.0,>=0.3.0 (from spacy)
  Downloading typer-0.9.4-py3-none-any.whl.metadata (14 kB)
Downloading typer-0.9.4-py3-none-any.whl (45 kB)
[0mInstalling collected packages: typer
  Attempting uninstall: typer
    Found existing installation: typer 0.12.5
    Uninstalling typer-0.12.5:
      Successfully uninstalled typer-0.12.5
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tts 0.22.0 requires numpy>=1.24.3; python_version > "3.10", but you have numpy 1.23.5 which is incompatible.
gradio 4.44.0 requires typer<1.0,>=0.12; sys_platform != "emscripten", but you have typer 0.9.4 which is incompatible.[0m[31m
[0mSuccessfully installed typer-0.9.4
  return torch._C._cuda_getDeviceCount() > 0
[0mCollecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.

In [6]:
import spacy
from spacy import displacy
from spacy.tokens import Span

# Load the English language model
nlp = spacy.load("en_core_web_sm")

# Process the medical report text
doc = nlp(input_text)

# Extract entities
entities = [(ent.text, ent.label_) for ent in doc.ents]
print(entities)

[('John Doe\nDate of Birth', 'PERSON'), ('1990-02-12', 'DATE'), ('2015', 'DATE'), ('2018', 'DATE'), ('Hyperlipidemia', 'PERSON'), ('2020', 'DATE'), ('Metformin 500', 'PERSON'), ('daily', 'DATE'), ('Lisinopril 10', 'LAW'), ('Atorvastatin 20', 'PERSON'), ('Blood Glucose Test', 'PERSON'), ('70', 'CARDINAL'), ('HDL', 'ORG'), ('LDL 100mg', 'PERSON'), ('130/80mmHg', 'CARDINAL'), ('120/80mmHg', 'CARDINAL'), ('Hypertension\nHyperlipidemia\nTreatment Plan:\n\nLifestyle', 'LAW'), ('Lisinopril', 'PERSON'), ('Atorvastatin\nFollow', 'PERSON'), ('every 3 months', 'DATE'), ('Jane Smith', 'PERSON'), ('Endocrinologist', 'ORG'), ('John Lee\n', 'PERSON'), ('Cardiologist', 'NORP'), ('Michael Brown', 'PERSON')]


In [7]:
# Extract concepts
concepts = [(token.text, token.pos_) for token in doc if token.pos_ in ["NOUN", "VERB", "ADJ"]]
print(concepts)

[('Information', 'NOUN'), ('Name', 'NOUN'), ('Sex', 'NOUN'), ('Diabetes', 'VERB'), ('diagnosed', 'VERB'), ('diagnosed', 'VERB'), ('diagnosed', 'VERB'), ('Current', 'ADJ'), ('Medications', 'NOUN'), ('daily', 'ADJ'), ('daily', 'ADJ'), ('Blood', 'NOUN'), ('normal', 'ADJ'), ('range', 'NOUN'), ('100mg', 'ADJ'), ('Blood', 'NOUN'), ('Pressure', 'NOUN'), ('normal', 'ADJ'), ('range', 'NOUN'), ('Diagnosis', 'NOUN'), ('Diabetes', 'VERB'), ('modifications', 'NOUN'), ('diet', 'NOUN'), ('exercise', 'VERB'), ('Medications', 'NOUN'), ('Follow', 'VERB'), ('appointments', 'NOUN'), ('months', 'NOUN')]


In [8]:
# Create a dictionary to store the extracted entities and concepts
entity_concept_dict = {}

# Iterate through the entities and concepts
for entity, entity_label in entities:
    for concept, concept_pos in concepts:
        if entity_label == "PERSON" and concept_pos == "NOUN":
            # Create a new entry in the dictionary
            entity_concept_dict[entity] = concept
            break

print(entity_concept_dict)

{'John Doe\nDate of Birth': 'Information', 'Hyperlipidemia': 'Information', 'Metformin 500': 'Information', 'Atorvastatin 20': 'Information', 'Blood Glucose Test': 'Information', 'LDL 100mg': 'Information', 'Lisinopril': 'Information', 'Atorvastatin\nFollow': 'Information', 'Jane Smith': 'Information', 'John Lee\n': 'Information', 'Michael Brown': 'Information'}


### Trial for now, this need to be done by following
* extracting text from document >> LLM ( extract or prepare the dict to have the struct )

Dict was not generated using the above cell, this just a sample to understand the output that is needed. **BEING USED in the next cell as input**

In [66]:
entity_concept_dict = {
    'Darshit Purohit': {'Patient', 'Diabetes Mellitus', 'Hypertension', 'Hyperlipidemia'},
    'Diabetes Mellitus': {'Disease', 'Metformin'},
    'Hypertension': {'Disease', 'Lisinopril'},
    'Hyperlipidemia': {'Disease', 'Atorvastatin'},
    'Metformin': {'Medication', 'Treatment'},
    'Lisinopril': {'Medication', 'Treatment'},
    'Atorvastatin': {'Medication', 'Treatment'},
    'Blood Glucose Test': {'Medical Test'},
    'Lipid Profile': {'Medical Test'},
    'Blood Pressure': {'Medical Test'},
    'LDL 100mg/dL': {'Medical Test Result'},
    'Primary Care Physician': {'Healthcare Provider'},
    'Endocrinologist': {'Healthcare Provider'},
    'Cardiologist': {'Healthcare Provider'},
    'Dr. Akash Chavan': {'Primary Care Physician'},
    'Dr. Parth Maru': {'Endocrinologist'},
    'Dr. Ayush Gupta': {'Cardiologist'}
}

## Basic of neo4j language

*Neo4j is a graph database that uses a query language called Cypher to interact with the database. Here are some basic concepts and syntax of Cypher:*

**Basic Syntax**

* MATCH clause: used to match patterns in the graph
* CREATE clause: used to create new nodes or relationships
* SET clause: used to set properties on nodes or relationships
* RETURN clause: used to return the results of a query

**Patterns**

* node: represents a node in the graph
* relationship: represents a relationship between two nodes
* label: represents a label on a node or relationship
* property: represents a property on a node or relationship

**Example Queries**

* MATCH (n:Person {name: "John"}) RETURN n: matches a node with label Person and property name equal to "John" and returns the node
* CREATE (n:Person {name: "Jane"}): creates a new node with label Person and property name equal to "Jane"
* MATCH (n:Person)-[:FRIEND]->(m:Person) RETURN n, m: matches two nodes with label Person that are connected by a relationship with label FRIEND and returns both nodes
* MATCH (n:Person {name: "John"})-[:FRIEND]->(m:Person) RETURN m: matches a node with label Person and property name equal to "John" and returns the node that is connected to it by a relationship with label FRIEND

**Cypher Functions**

* START: used to start a pattern match
* END: used to end a pattern match
* RETURN: used to return the results of a query
* WITH: used to store intermediate results in a query

**Example Cypher Functions**

* START n=node(*) MATCH (n)-[:FRIEND]->(m) RETURN n, m: starts a pattern match at all nodes and matches two nodes that are connected by a relationship with label FRIEND and returns both nodes
* WITH n, m MATCH (n)-[:FRIEND]->(o) RETURN o: starts a pattern match at the nodes returned by the previous query and matches a node that is connected to one of the nodes by a relationship with label FRIEND and returns the node

In [14]:
!pip install neo4j

[0mCollecting neo4j
  Downloading neo4j-5.25.0-py3-none-any.whl.metadata (5.7 kB)
Downloading neo4j-5.25.0-py3-none-any.whl (296 kB)
[0mInstalling collected packages: neo4j
[0mSuccessfully installed neo4j-5.25.0
[0m

In [61]:
import neo4j
from neo4j import GraphDatabase

# Create a new graph database instance
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("darshit", "password"))

In [67]:
# Create a new session
session = driver.session()

# Create nodes for patients, diseases, medications, and healthcare providers
for node, labels in entity_concept_dict.items():
    if len(labels) > 1:
        label = list(labels)[-1]
    else:
        label = list(labels)[0]
#     params = node
# #     CREATE (ee:Person {name: 'Emil', from: 'Sweden', kloutScore: 99})
    session.run("CREATE (n:{} {{name:'{}'}})".format(label.replace(' ','').replace(',',''), node))

In [68]:
# Create relationships between nodes
for node, labels in entity_concept_dict.items():
    for label in labels:
        if label == 'Patient':
            for other_node, other_labels in entity_concept_dict.items():
                if other_node != node and 'Disease' in other_labels:
                    session.run("MATCH (n {name: $name}) MATCH (o {name: $other_node}) CREATE (n)-[:HAS_DISEASE]->(o)", name=node, other_node=other_node)
        elif label == 'Disease':
            for other_node, other_labels in entity_concept_dict.items():
                if other_node != node and 'Medication' in other_labels:
                    session.run("MATCH (n {name: $name}) MATCH (o {name: $other_node}) CREATE (n)-[:TREATED_BY]->(o)", name=node, other_node=other_node)
        elif label == 'Medication':
            for other_node, other_labels in entity_concept_dict.items():
                if other_node != node and 'Treatment' in other_labels:
                    session.run("MATCH (n {name: $name}) MATCH (o {name: $other_node}) CREATE (n)-[:USES]->(o)", name=node, other_node=other_node)
        elif label == 'Healthcare Provider':
            for other_node, other_labels in entity_concept_dict.items():
                if other_node != node and 'Patient' in other_labels:
                    session.run("MATCH (n {name: $name}) MATCH (o {name: $other_node}) CREATE (n)-[:TREATS]->(o)", name=node, other_node=other_node)

# Close the session
session.close()