In [3]:
from langchain_community.llms import Ollama

def query_ollama(model: str, disease_description: str, disease_id: str):
    """Prompt an Ollama model using LangChain for disease classification."""
    prompt = f"""
    You are to perform zero-shot classification of a disease based on a description. You are provided with the disease classes and their IDs. You are to return the OMOP Disease name + OMOP Disease ID as a CSV with an additional column named Prime_KG_Disease_ID. Do not add any comments.

    I have a disease named: 
    OMOP_Disease_Description, OMOP_Disease_ID
    {disease_description}, {disease_id}

    These are my classifications:
    disease_id,disease_name,disease_description
    MONDO:0004975,Alzheimer's disease,A progressive neurodegenerative disorder characterized by memory loss, cognitive decline, and behavioral changes.
    MONDO:0010150,Parkinson's disease,A neurodegenerative disorder affecting movement, causing tremors, stiffness, and difficulty with balance and coordination.
    MONDO:0007254,Huntington's disease,A hereditary neurodegenerative disorder causing involuntary movements, psychiatric symptoms, and cognitive decline.
    MONDO:0015967,Amyotrophic lateral sclerosis,A progressive motor neuron disease leading to muscle weakness, paralysis, and respiratory failure.
    MONDO:0005015,Cystic fibrosis,A genetic disorder affecting the lungs and digestive system, leading to mucus buildup, breathing difficulties, and infections.
    """

    llm = Ollama(model=model)
    response = llm.invoke(prompt)
    return response

if __name__ == "__main__":
    model_name = "mistral"  # Replace with the desired Ollama model
    disease_desc = "Alzheimer's Disease"
    disease_id = "123213"
    result = query_ollama(model_name, disease_desc, disease_id)
    print("Response:", result)

ConnectionError: HTTPConnectionPool(host='localhost', port=11434): Max retries exceeded with url: /api/generate (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f5b89695b20>: Failed to establish a new connection: [Errno 111] Connection refused'))

In [1]:
import networkx as nx
import pandas as pd

In [2]:
persondf = pd.read_csv("DataFiles/person.csv")
visit_occurrencedf = pd.read_csv("DataFiles/visit_occurrence.csv")
condition_occurrencedf = pd.read_csv("DataFiles/condition_occurrence.csv")
drug_exposuredf = pd.read_csv("DataFiles/drug_exposure.csv")
measurementdf = pd.read_csv("DataFiles/measurement.csv")
G = nx.Graph()

In [3]:
personlist = persondf['person_id'].tolist()
condition_occurrence_id_list = condition_occurrencedf['condition_occurrence_id'].tolist()
drug_exposure_id_list = drug_exposuredf['drug_exposure_id'].tolist()
measurement_id_list = measurementdf['measurement_id'].tolist()

In [4]:
data1df = pd.merge(persondf, visit_occurrencedf, on='person_id', how='inner')

In [5]:
# Add patient and visit nodes with visit type as relation
# create 2 person + 23 visit nodes
for index, row in data1df.iterrows():
    G.add_edge(row['person_id'], row['visit_occurrence_id'], relation=row['visit_type_concept_id'])

In [6]:
#Add properties (gender, year of birth, race and ethnicity) to patient nodes
for x in G.nodes():
    for y in personlist:
        if x == y:
            G.nodes[x]['gender'] = persondf.loc[persondf['person_id'] == x, 'gender_concept_id'].values[0]
            G.nodes[x]['birth_year'] = persondf.loc[persondf['person_id'] == x, 'year_of_birth'].values[0]
            G.nodes[x]['race'] = persondf.loc[persondf['person_id'] == x, 'race_concept_id'].values[0]
            G.nodes[x]['ethnicity'] = persondf.loc[persondf['person_id'] == x, 'ethnicity_concept_id'].values[0]

In [7]:
# Add patient and condition nodes with indication as relation
# adds 1 indication nodes and 2 edges (1 for each patient)
for index, row in condition_occurrencedf.iterrows():
    G.add_edge(row['person_id'], row['condition_concept_id'], relation=row['condition_occurrence_id'])

In [8]:
for x in G.edges():
    for y in condition_occurrence_id_list:
        if x == y:
            G.edges[x]['condition_start_date'] = condition_occurrencedf.loc[condition_occurrencedf['condition_occurrence_id'] == x, 'condition_start_date'].values[0]
            G.edges[x]['condition_type_concept_id'] = condition_occurrencedf.loc[condition_occurrencedf['condition_occurrence_id'] == x, 'condition_type_concept_id'].values[0]
            G.edges[x]['visit_occurrence_id'] = condition_occurrencedf.loc[condition_occurrencedf['condition_occurrence_id'] == x, 'visit_occurrence_id'].values[0]
            G.edges[x]['condition_source_value'] = condition_occurrencedf.loc[condition_occurrencedf['condition_occurrence_id'] == x, 'condition_source_value'].values[0]
            G.edges[x]['condition_source_concept_id'] = condition_occurrencedf.loc[condition_occurrencedf['condition_occurrence_id'] == x, 'condition_source_concept_id'].values[0]

In [9]:
# Add patient and drug nodes with drug as relation
# adds 1 indication nodes and 2 edges (1 for each patient)
for index, row in drug_exposuredf.iterrows():
    G.add_edge(row['person_id'], row['drug_concept_id'], relation=row['drug_exposure_id'])

In [10]:
for x in G.edges():
    for y in drug_exposure_id_list:
        if x == y:
            G.edges[x]['drug_exposure_start_date'] = drug_exposuredf.loc[drug_exposuredf['drug_exposure_id'] == x, 'drug_exposure_start_date'].values[0]
            G.edges[x]['drug_exposure_end_date'] = drug_exposuredf.loc[drug_exposuredf['drug_exposure_id'] == x, 'drug_exposure_end_date'].values[0]
            G.edges[x]['drug_type_concept_id'] = drug_exposuredf.loc[drug_exposuredf['drug_exposure_id'] == x, 'drug_type_concept_id'].values[0]
            G.edges[x]['quantity'] = drug_exposuredf.loc[drug_exposuredf['drug_exposure_id'] == x, 'quantity'].values[0]
            G.edges[x]['visit_occurrence_id'] = drug_exposuredf.loc[drug_exposuredf['drug_exposure_id'] == x, 'visit_occurrence_id'].values[0]
            G.edges[x]['drug_source_value'] = drug_exposuredf.loc[drug_exposuredf['drug_exposure_id'] == x, 'drug_source_value'].values[0]
            G.edges[x]['drug_source_concept_id'] = drug_exposuredf.loc[drug_exposuredf['drug_exposure_id'] == x, 'drug_source_concept_id'].values[0]
            G.edges[x]['dose_unit_source_value'] = drug_exposuredf.loc[drug_exposuredf['drug_exposure_id'] == x, 'dose_unit_source_value'].values[0]

In [11]:
# Add patient and drug nodes with drug as relation
# adds 2 measurement nodes and 
for index, row in measurementdf.iterrows():
    G.add_edge(row['person_id'], row['measurement_concept_id'], relation=row['measurement_id'])

In [12]:
for x in G.edges():
    for y in measurement_id_list:
        if x == y:
            G.edges[x]['measurement_date'] = measurementdf.loc[measurementdf['measurement_id'] == x, 'measurement_date'].values[0]
            G.edges[x]['measurement_type_concept_id'] = measurementdf.loc[measurementdf['measurement_id'] == x, 'measurement_type_concept_id'].values[0]
            G.edges[x]['value_as_number'] = measurementdf.loc[measurementdf['measurement_id'] == x, 'value_as_number'].values[0]
            G.edges[x]['visit_occurrence_id'] = measurementdf.loc[measurementdf['measurement_id'] == x, 'visit_occurrence_id'].values[0]
            G.edges[x]['unit_source_value'] = measurementdf.loc[measurementdf['measurement_id'] == x, 'unit_source_value'].values[0]
            G.edges[x]['value_source_value'] = measurementdf.loc[measurementdf['measurement_id'] == x, 'value_source_value'].values[0]

In [13]:
#Testing
print(G.nodes[42779731])
print(G.nodes[10866894])
#Number of nodes = #patients (2) + #number of visits (23) + #indications (1) + #drugs (1) + #measurements(2)
print('#nodes: {}'.format(len(G.nodes())))
print('#edges: {}'.format(len(G.edges())))

{'gender': 8507, 'birth_year': 1990, 'race': 8527, 'ethnicity': 38003564}
{'gender': 8532, 'birth_year': 1991, 'race': 8515, 'ethnicity': 38003564}
#nodes: 29
#edges: 31


In [14]:
print("Nodes:", G.nodes())
print("Edges:", G.edges(data=True))

Nodes: [42779731, 4040291676, 2801186385, 2783217539, 10775402167, 2242067096, 3731844854, 9743922729, 6074101379, 11797111425, 1543100189, 10555805378, 7443984808, 1108391385, 10076561060, 11356581287, 10866894, 4044989767, 6401219436, 11532695969, 861554955, 10919348788, 8952576947, 1051303004, 1304875291, 195585, 41148645, 4159558, 4208414]
Edges: [(42779731, 4040291676, {'relation': 32810}), (42779731, 2801186385, {'relation': 32810}), (42779731, 2783217539, {'relation': 32810}), (42779731, 10775402167, {'relation': 32810}), (42779731, 2242067096, {'relation': 32810}), (42779731, 3731844854, {'relation': 32810}), (42779731, 9743922729, {'relation': 32810}), (42779731, 6074101379, {'relation': 32810}), (42779731, 11797111425, {'relation': 32810}), (42779731, 1543100189, {'relation': 32810}), (42779731, 10555805378, {'relation': 32810}), (42779731, 7443984808, {'relation': 32810}), (42779731, 1108391385, {'relation': 32810}), (42779731, 10076561060, {'relation': 32810}), (42779731, 1