## Imports

In [13]:
import ollama
from IPython.display import display, Markdown
import pandas as pd
from dataclasses import dataclass
from typing import List, Tuple
from collections import defaultdict

## Data Loading

In [None]:
ppi_filtered = "D:/CSE498R_Resources/D3N/Dengue-Drug-Discovery-Network-D3N/Data/d3n_processed_data/ppi_filtered.csv"
ppi_filtered_df = pd.read_csv(ppi_filtered)
ppi_filtered_df.head()

Unnamed: 0,pmid,sent_no,entity_no,sent_iter_no,text,text_with_entity_marker,entity_1,entity_1_idx,entity_1_type,entity_2,entity_2_idx,entity_2_type,relation_id,relation_type,directed,reversed
0,11893341,4,3,0,"The structure suggests that flaviviruses, and ...","The structure suggests that flaviviruses, and ...",distal beta barrels,"[115, 135]",@PROTEIN,domain II,"[138, 148]",@PROTEIN,2,structural,False,False
1,23389466,3,3,0,DENV binds to its receptor molecules mediated ...,DENV binds to its [E1]receptor molecules [/E1]...,receptor molecules,"[18, 37]",@PROTEIN,"viral envelope (E) protein,","[56, 83]",@PROTEIN,2,structural,False,False
2,23389466,3,3,1,DENV binds to its receptor molecules mediated ...,DENV binds to its [E1]receptor molecules [/E1]...,receptor molecules,"[18, 37]",@PROTEIN,virus-receptor complex,"[117, 140]",@PROTEIN,2,structural,False,False
3,23389466,3,3,2,DENV binds to its receptor molecules mediated ...,DENV binds to its receptor molecules mediated ...,"viral envelope (E) protein,","[56, 83]",@PROTEIN,virus-receptor complex,"[117, 140]",@PROTEIN,2,structural,False,False
4,25157370,8,3,0,The viral entry process is mediated by viral p...,The viral entry process is mediated by [E1]vir...,viral proteins,"[39, 54]",@PROTEIN,cellular receptor molecules,"[58, 86]",@PROTEIN,2,structural,False,False


## Dataclass and Util Functions

In [None]:
@dataclass
class LLMData:
    pmid: str
    sentence_no: int
    sentence: str
    entities: List[Tuple[str, str]]  # List of (entity_name, entity_type)
    relations: list


def merge_entities(df):
    grouped_data = defaultdict(lambda: {"sentence": "", "entities": [], "relations": []})
    
    for _, row in df.iterrows():
        key = (row["pmid"], row["sent_no"])
        grouped_data[key]["sentence"] = row["text"]
        grouped_data[key]["entities"].append((row["entity_1"], row["entity_1_type"]))
        grouped_data[key]["entities"].append((row["entity_2"], row["entity_2_type"]))
        grouped_data[key]["relations"].append(row["relation_type"])
    
    llm_data_list = [LLMData(pmid=key[0], sentence_no=key[1], sentence=value["sentence"], 
                             entities=value["entities"], relations=value["relations"]) 
                     for key, value in grouped_data.items()]
    
    return llm_data_list

In [22]:
structured_ppi = merge_entities(ppi_filtered_df)

for item in structured_ppi[:3]:
    print(item)

LLMData(pmid=11893341, sentence_no=4, sentence='The structure suggests that flaviviruses, and by analogy also alphaviruses, employ a fusion mechanism in which the distal beta barrels of domain II of the glycoprotein E are inserted into the cellular membrane.', entities=[('distal beta barrels', '@PROTEIN'), ('domain II', '@PROTEIN')], relations=['structural'])
LLMData(pmid=23389466, sentence_no=3, sentence='DENV binds to its receptor molecules mediated through a viral envelope (E) protein, followed by incorporation of the virus-receptor complex inside cells.', entities=[('receptor molecules', '@PROTEIN'), ('viral envelope (E) protein,', '@PROTEIN'), ('receptor molecules', '@PROTEIN'), ('virus-receptor complex', '@PROTEIN'), ('viral envelope (E) protein,', '@PROTEIN'), ('virus-receptor complex', '@PROTEIN')], relations=['structural', 'structural', 'structural'])
LLMData(pmid=25157370, sentence_no=8, sentence='The viral entry process is mediated by viral proteins and cellular receptor mol

### Previous Prompt Template

In [None]:
prompt = f"""
Context:
I am analyzing biomedical literature to identify biological interactions between predefined entities, including PROTEIN, RNA, DNA, CELL_LINE, and CELL_TYPE.
Given a sentence containing two entities, your task is to determine their interaction based on the sentence’s context.

Task:
Identify the type of interaction between the two given entities.

Entities in the sentence:
- {entity_1} ({entity_1_type})
- {entity_2} ({entity_2_type})

Input Sentence:
"{sentence}"

Label the interaction using one of the following specific categories:
- **Protein-Protein Interactions (PPIs):** Binding, Phosphorylation, Ubiquitination, Cleavage, Inhibition, Activation, Transport, Aggregation, Structural Support.
- **Gene Interactions (GIs):** Transcriptional Activation, Transcriptional Repression, Splicing Regulation, Translation Inhibition, Translation Enhancement, Epigenetic Modification, RNA Stability Regulation.
- **Metabolic & Signaling Interactions:** Signal Transduction, Metabolic Catalysis, Feedback Inhibition, Co-factor Association.
- **Cell Line Interactions:** Proliferation Increase, Proliferation Inhibition, Apoptosis Induction, Cell Cycle Arrest, Drug Sensitivity, Mutation-Induced Resistance, Adhesion Loss, Invasion Enhancement, Immune Evasion.
- **Other Cases:** Competitive Binding, No Known Interaction, Contradictory Evidence.

Output Format:
- **Entities:** {entity_1} ({entity_1_type}), {entity_2} ({entity_2_type})
- **Interaction Type:** [Predicted Interaction Type]
- **Justification:** [Brief explanation based on the sentence]

If an interaction is unclear, respond with "Uncertain" and explain why.
If no known interaction exists, label it as "No Known Interaction".
DO NOT hallucinate interactions—strictly rely on the given sentence.
"""

In [None]:

# def format_prompt(sentence, entity_1, entity_1_type, entity_2, entity_2_type, relation_type):
def format_prompt(LLMData):
    """
    Formats the prompt following the structured format optimized for Gemma:2b.
    """
    interaction_note = "If no known interaction exists or if the interaction is inhibitory, classify it as 'negative'." if LLMData.relation_type == "negative" else ""
    
    prompt = f"""
    [INPUT]
    {sentence}
    
    [CONTEXT]
    Entities: [E1]={entity_1} ({entity_1_type}), [E2]={entity_2} ({entity_2_type}).
    Choose a relation type from the options: "bind", "suppress", "inhibit", "activate", "transcribe", "metabolize", etc.
    If uncertain, respond with "Uncertain".
    If no known interaction exists, label it as "No Known Interaction".
    
    [EXAMPLE]
    Question: What is the relation between Entity 1 and Entity 2?
    Entity 1: antibodies
    Entity 2: 37kD/48-kDa high-affinity laminin receptor
    Answer: (antibodies, bind, 37kD/48-kDa high-affinity laminin receptor)
    
    [QUESTION]
    What is the relation between Entity 1 and Entity 2?
    Entity 1: {entity_1}
    Entity 2: {entity_2}
    
    [OUTPUT INSTRUCTION]
    Provide the response in this format: (Entity 1, Relation, Entity 2)
    {interaction_note}
    """
    return prompt

In [None]:
def format_prompt(template, **kwargs):
    """Formats a template with provided keyword arguments"""
    return template.format(**kwargs)

templates = {
    "qa":"You are an AI Assistant. Answer the following question concisely.\nQuestion: {question} \nAnswer:",
    "summarize": "Summarize the following text: \n{text} \nSummary:",
    "d3n":
}




# Function to interact with local LLM
def chat_with_llm(model, template_name, **kwargs):
    if template_name not in templates:
        raise ValueError(f"Template '{template_name}' not found.")
    
    # prompt = format_prompt(templates[template_name], **kwargs)
    response = ollama.chat(model=model, messages=[{"role": "user", "content": prompt}])

    # Debug: Print the response structure
    print((response.name))

    # Adjust key access based on response format
    if 'message' in response and 'content' in response['message']:
        response_text = response['message']['content']
    elif 'response' in response:  # Alternative key structure
        response_text = response['response']
    else:
        response_text = "Error: Unexpected response format."

    display(Markdown(f"**Prompt:**\n```\n{prompt}\n```\n**Response:** {response_text}"))


# Example usage
model_name = "gemma:2b"  # Change this to your installed local model
question = "What is the capital of France?"
chat_with_llm(model_name, "qa", question=question)

<class 'ollama._types.ChatResponse'>


**Prompt:**
```
You are an AI Assistant. Answer the following question concisely.
Question: What is the capital of France? 
Answer:
```
**Response:** The capital of France is Paris. It is the political, economic, and cultural center of France.