In [None]:
##########################################################################################

In [5]:
from langchain.llms import Ollama
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Initialize Ollama with deepseek model
llm = Ollama(model="deepseek-r1:8b")





Original Text:
 
Diabetes mellitus is a chronic metabolic disorder characterized by high blood sugar levels over a prolonged period.

The primary cause of diabetes is the body's inability to produce enough insulin or effectively use the insulin it produces.

Common symptoms include frequent urination, increased thirst, unexplained weight loss, and fatigue.

Older adults and individuals with a family history of diabetes are more susceptible to this condition.

Medications like Metformin are commonly prescribed to manage blood sugar levels in diabetic patients.


Extracted Triplets:
 ```json
{
  "entities_and_triples": [
    "[1], Disease:Diabetes",
    "[2], Monitoring:Blood glucose levels",
    "[3], Exercise/Physical Activity:Exercise",
    "[4], Diet/Balanced Diet:Diet",
    "2 RELATIONSHIP 1",  // Blood glucose level monitoring relates to managing Diabetes
    "3 RELATIONSHIP 1",  // Exercise relates to managing Diabetes
    "4 RELATIONSHIP 1"   // Diet relates to managing Diabetes

In [11]:
'''
# Define the triplet extraction prompt template
prompt_template = PromptTemplate(
    input_variables=["text"],
    template="""
    You're an expert in NLP for tasks related to entity linking in medical texts.
    For the given text snippet, you're expected to:
    [1] Identify all relevant entities (e.g., diseases, symptoms, medications, demographic information, etc.).
    [2] Determine the relationships between these entities.
    [3] Assign meaningful labels to both entities and relationships.
    
    **Text:**
    {text}
    
    Extract and return the information in JSON format, strictly following this structure:
    ```json
    {{
        "entities_and_triples": [
            "[1], ENTITY_TYPE:Entity_Name",
            "[2], ENTITY_TYPE:Entity_Name",
            "[1] RELATIONSHIP [2]"
        ]
    }}
    ```
    Do not include any explanations, only output the JSON.
    """
)

'''

'\n# Define the triplet extraction prompt template\nprompt_template = PromptTemplate(\n    input_variables=["text"],\n    template="""\n    You\'re an expert in NLP for tasks related to entity linking in medical texts.\n    For the given text snippet, you\'re expected to:\n    [1] Identify all relevant entities (e.g., diseases, symptoms, medications, demographic information, etc.).\n    [2] Determine the relationships between these entities.\n    [3] Assign meaningful labels to both entities and relationships.\n    \n    **Text:**\n    {text}\n    \n    Extract and return the information in JSON format, strictly following this structure:\n    ```json\n    {{\n        "entities_and_triples": [\n            "[1], ENTITY_TYPE:Entity_Name",\n            "[2], ENTITY_TYPE:Entity_Name",\n            "[1] RELATIONSHIP [2]"\n        ]\n    }}\n    ```\n    Do not include any explanations, only output the JSON.\n    """\n)\n\n'

In [12]:
'''
# Define the improved triplet extraction prompt template
prompt_template = PromptTemplate(
    input_variables=["text"],
    template="""
    You're an expert in NLP for medical entity extraction and relationship discovery.
    Your task is to:
    
    1. Identify **all** relevant entities in the given medical text, including:
        - Diseases
        - Symptoms
        - Medications
        - Demographic factors (age groups, gender, risk factors)
        - Treatments (lifestyle, diet, exercise)
        - Medical procedures
        - Biological markers (blood glucose, insulin levels, etc.)
    
    2. Establish **all** meaningful relationships between the entities, including:
        - Causes, risk factors, symptoms of diseases
        - Medications prescribed for diseases
        - Contraindications (e.g., a drug not suitable for a condition)
        - Treatments and diagnostic procedures linked to diseases
        - Interactions between treatments and biological markers

    **Text:**
    {text}
    
    Extract and return the information in JSON format, strictly following this structure:
    ```json
    {{
        "entities_and_triples": [
            "[1], ENTITY_TYPE:Entity_Name",
            "[2], ENTITY_TYPE:Entity_Name",
            "[1] RELATIONSHIP [2]"
        ]
    }}
    ```
    Ensure all relationships are included. Do **not** summarize or omit details.
    """
)
'''

In [17]:
'''
prompt_template = PromptTemplate(
    input_variables=["text"],
    template="""
    You're an expert in NLP for medical entity extraction and relationship discovery.
    Your task is to:

    1. Identify **all** relevant entities in the given medical text, including:
        - Diseases
        - Symptoms
        - Medications
        - Demographic factors (age groups, gender, risk factors)
        - Treatments (lifestyle, diet, exercise)
        - Medical procedures
        - Biological markers (blood glucose, insulin levels, etc.)

    2. Establish **all** meaningful relationships between the entities, including:
        - Causes, risk factors, symptoms of diseases
        - Medications prescribed for diseases
        - Contraindications (e.g., a drug not suitable for a condition)
        - Treatments and diagnostic procedures linked to diseases
        - Interactions between treatments and biological markers

    **Text:**
    {text}

    Extract and return the information in JSON format, strictly following this structure:
    ```json
    {{
        "entities_and_triples": [
            "ENTITY_TYPE:Entity_Name",
            "ENTITY_TYPE:Entity_Name",
            "Entity_Name RELATIONSHIP Entity_Name"
        ]
    }}
    ```
    Ensure that:
    - Each relationship explicitly shows the two entities and the relationship name.
    - Use meaningful and natural relationship names like "causes," "is treated with," "is a risk factor for," etc.
    - Do **not** use placeholders like [1], [2]. Instead, use actual entity names.
    - Do **not** summarize or omit details.
    """
)
'''

In [35]:
prompt_template = PromptTemplate(
    input_variables=["text"],
    template="""
    You're an expert in medical NLP for entity and relationship extraction.
    
    **Your task is to:**
    1️⃣ Identify **ALL** relevant entities, including:
       - Diseases, Symptoms, Causes, Treatments, Risk Factors, Medications, Demographics.
    2️⃣ Extract **ALL** relationships between these entities.
    3️⃣ Structure the output into three sections:  
       - **Entities first**
       - **Relationship types**
       - **Relationships between entities**
    
    **Text:**
    {text}

    **Output the information as JSON in the following structured format:**
    ```json
    {{
        "entities": [
            "ENTITY_TYPE:Entity_Name"
        ],
        "relationship_types": [
            "Relationship_Type"
        ],
        "relationships": [
            "Entity_Name Relationship_Type Entity_Name"
        ]
    }}
    ```

    **Instructions:**
    - **Extract every single relevant entity** (no missing symptoms or risk factors).
    - **Ensure that every extracted entity has at least one relationship** (no isolated entities).
    - **First list all identified entities.**
    - **Then, list distinct relationship types found in the text.**
    - **Finally, structure relationships as `Entity_Name Relationship Entity_Name`.**
    - **Do NOT include explanations or summaries—just output JSON.**
    """
)


In [36]:
# Create a LangChain LLM chain
llm_chain = LLMChain(llm=llm, prompt=prompt_template)



In [43]:
# Example medical text for triplet extraction
medical_text = """
Diabetes mellitus is a chronic metabolic disorder characterized by high blood sugar levels over a prolonged period.

The primary cause of diabetes is the body's inability to produce enough insulin or effectively use the insulin it produces.

Common symptoms include frequent urination, increased thirst, unexplained weight loss, and fatigue.

Older adults and individuals with a family history of diabetes are more susceptible to this condition.

Medications like Metformin are commonly prescribed to manage blood sugar levels in diabetic patients.
"""



In [40]:
'''
medical_text = """
Coronavirus (COVID-19) is a viral infection caused by the SARS-CoV-2 virus. It spreads mainly through respiratory droplets when an infected person coughs, sneezes, or talks. The virus can also spread by touching contaminated surfaces and then touching the face.

Common symptoms of COVID-19 include fever, cough, sore throat, shortness of breath, fatigue, body aches, and loss of taste or smell. Some people may also experience headaches, diarrhea, or nasal congestion. Severe cases can lead to pneumonia, difficulty breathing, organ failure, or death.

Elderly people, individuals with weakened immune systems, and those with pre-existing conditions like diabetes, heart disease, or lung problems are at higher risk of severe illness. Pregnant women and people with obesity may also face complications.

There is no specific cure for COVID-19, but vaccines help prevent severe illness. Treatment mainly includes rest, hydration, and symptom management. Doctors may prescribe antiviral medications like Paxlovid or Remdesivir for severe cases. Oxygen therapy and ventilators may be needed for critically ill patients.

To reduce the risk of infection, people should wear masks in crowded places, wash hands regularly, and maintain social distancing. Vaccination remains the best protection against severe disease.

Early detection and medical care can improve recovery chances. If symptoms appear, testing and self-isolation are recommended to prevent the spread of the virus.
"""
'''

In [44]:
# Run the LLM to extract triplets
triplet_output = llm_chain.run(medical_text)
final_output = triplet_output.split("\n\n")[-1]



In [45]:
# Output the extracted triplets
print("\nOriginal Text:\n", medical_text)
print("\nExtracted Triplets:\n", final_output)


Original Text:
 
Diabetes mellitus is a chronic metabolic disorder characterized by high blood sugar levels over a prolonged period.

The primary cause of diabetes is the body's inability to produce enough insulin or effectively use the insulin it produces.

Common symptoms include frequent urination, increased thirst, unexplained weight loss, and fatigue.

Older adults and individuals with a family history of diabetes are more susceptible to this condition.

Medications like Metformin are commonly prescribed to manage blood sugar levels in diabetic patients.


Extracted Triplets:
 ```json
{
    "entities": [
        "Disease:Diabetes mellitus",
        "Symptom:frequent urination",
        "Symptom:increased thirst",
        "Symptom:unexplained weight loss",
        "Symptom:fatigue",
        "Cause:insufficient insulin production",
        "Cause:ineffective insulin use",
        "Risk Factor:older adults",
        "Risk Factor:family history of diabetes",
        "Treatment:Metfor