# Problem Statement: Symptom Finder for HealthEase!

### You've been hired by HealthEase an advanced clinic where AI helps doctors make faster and smarter decisions. Youâ€™ve been tasked with building a system that can assist doctors by finding similar past cases based on symptoms.

Import ChromaDB

In [2]:
import chromadb
import csv

Instantiate a chromadb client and create the `medical_notes` collection.

In [3]:
client = chromadb.Client()
collection = client.get_or_create_collection(name='medical_notes')

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


Implement the `ingest_notes` function to load the CSV and ingest each row (symptoms are ingested as docs and each row is ingested as metadata)

In [8]:
def ingest_notes(csv_path: str):
    with open(csv_path, "r") as file:
        reader = csv.DictReader(file)
        for row in reader:
            patient_id = row["patient_id"]
            symptoms = row["symptoms"]
            collection.add(
                documents=[symptoms],
                metadatas=[row],
                ids=[patient_id]
            )

Implement the `query_notes` function to fetch and print relevant patient records from chromadb. The records should be printed in the following format:

```text
Result 1:
Symptoms: Sharp back pain, numbness in legs
Diagnosis: Herniated Disc
Treatment: Physical therapy, pain management
Doctor Notes: MRI suggested to confirm disc herniation.
```

In [35]:
def query_notes(symptoms: str, top_k: int = 5):
    results = collection.query(query_texts=[symptoms], n_results=top_k)

    for i, (doc, metadata) in enumerate(zip(results["documents"][0], results["metadatas"][0])):
        print(f"\nResult {i+1}:")
        print(f"Symptoms: {doc}")
        print(f"Diagnosis: {metadata['diagnosis']}")
        print(f"Treatment: {metadata['treatment']}")
        print(f"Doctor Notes: {metadata['doctor_notes']}")
    return results

In [32]:
ingest_notes("healthcare_patient_records.csv")

Insert of existing embedding ID: PT-1000


Add of existing embedding ID: PT-1000
Insert of existing embedding ID: PT-1001
Add of existing embedding ID: PT-1001
Insert of existing embedding ID: PT-1002
Add of existing embedding ID: PT-1002
Insert of existing embedding ID: PT-1003
Add of existing embedding ID: PT-1003
Insert of existing embedding ID: PT-1004
Add of existing embedding ID: PT-1004
Insert of existing embedding ID: PT-1005
Add of existing embedding ID: PT-1005
Insert of existing embedding ID: PT-1006
Add of existing embedding ID: PT-1006
Insert of existing embedding ID: PT-1007
Add of existing embedding ID: PT-1007
Insert of existing embedding ID: PT-1008
Add of existing embedding ID: PT-1008
Insert of existing embedding ID: PT-1009
Add of existing embedding ID: PT-1009
Insert of existing embedding ID: PT-1010
Add of existing embedding ID: PT-1010
Insert of existing embedding ID: PT-1011
Add of existing embedding ID: PT-1011
Insert of existing embedding ID: PT-1012
Add of existing embedding ID: PT-1012
Insert of exis

{1: 'PT-1000', 2: 'PT-1001', 3: 'PT-1002', 4: 'PT-1003', 5: 'PT-1004', 6: 'PT-1005', 7: 'PT-1006', 8: 'PT-1007', 9: 'PT-1008', 10: 'PT-1009', 11: 'PT-1010', 12: 'PT-1011', 13: 'PT-1012', 14: 'PT-1013', 15: 'PT-1014', 16: 'PT-1015', 17: 'PT-1016', 18: 'PT-1017', 19: 'PT-1018', 20: 'PT-1019', 21: 'PT-1020', 22: 'PT-1021', 23: 'PT-1022', 24: 'PT-1023', 25: 'PT-1024', 26: 'PT-1025', 27: 'PT-1026', 28: 'PT-1027', 29: 'PT-1028', 30: 'PT-1029', 31: 'PT-1030', 32: 'PT-1031', 33: 'PT-1032', 34: 'PT-1033', 35: 'PT-1034', 36: 'PT-1035', 37: 'PT-1036', 38: 'PT-1037', 39: 'PT-1038', 40: 'PT-1039', 41: 'PT-1040', 42: 'PT-1041', 43: 'PT-1042', 44: 'PT-1043', 45: 'PT-1044', 46: 'PT-1045', 47: 'PT-1046', 48: 'PT-1047', 49: 'PT-1048', 50: 'PT-1049'}


In [33]:
patients_dict={}
with open("healthcare_patient_records.csv",'r') as file:
    reader=csv.DictReader(file)
    for id, row in enumerate(reader,start=1):
        patients_dict[id]=row['patient_id']

print(patients_dict)

{1: 'PT-1000', 2: 'PT-1001', 3: 'PT-1002', 4: 'PT-1003', 5: 'PT-1004', 6: 'PT-1005', 7: 'PT-1006', 8: 'PT-1007', 9: 'PT-1008', 10: 'PT-1009', 11: 'PT-1010', 12: 'PT-1011', 13: 'PT-1012', 14: 'PT-1013', 15: 'PT-1014', 16: 'PT-1015', 17: 'PT-1016', 18: 'PT-1017', 19: 'PT-1018', 20: 'PT-1019', 21: 'PT-1020', 22: 'PT-1021', 23: 'PT-1022', 24: 'PT-1023', 25: 'PT-1024', 26: 'PT-1025', 27: 'PT-1026', 28: 'PT-1027', 29: 'PT-1028', 30: 'PT-1029', 31: 'PT-1030', 32: 'PT-1031', 33: 'PT-1032', 34: 'PT-1033', 35: 'PT-1034', 36: 'PT-1035', 37: 'PT-1036', 38: 'PT-1037', 39: 'PT-1038', 40: 'PT-1039', 41: 'PT-1040', 42: 'PT-1041', 43: 'PT-1042', 44: 'PT-1043', 45: 'PT-1044', 46: 'PT-1045', 47: 'PT-1046', 48: 'PT-1047', 49: 'PT-1048', 50: 'PT-1049'}


In [34]:
query_notes('Stomach pain and bloating')

{'ids': [['PT-1005', 'PT-1009', 'PT-1014', 'PT-1042', 'PT-1043']],
 'distances': [[0.26676374673843384,
   0.26676374673843384,
   0.26676374673843384,
   0.26676374673843384,
   0.26676374673843384]],
 'metadatas': [[{'diagnosis': 'Irritable Bowel Syndrome',
    'doctor_notes': 'IBS symptoms; recommend low FODMAP diet.',
    'patient_id': 'PT-1005',
    'symptoms': 'Abdominal pain, bloating, diarrhea',
    'treatment': 'Diet changes, antispasmodic medication'},
   {'diagnosis': 'Irritable Bowel Syndrome',
    'doctor_notes': 'IBS symptoms; recommend low FODMAP diet.',
    'patient_id': 'PT-1009',
    'symptoms': 'Abdominal pain, bloating, diarrhea',
    'treatment': 'Diet changes, antispasmodic medication'},
   {'diagnosis': 'Irritable Bowel Syndrome',
    'doctor_notes': 'IBS symptoms; recommend low FODMAP diet.',
    'patient_id': 'PT-1014',
    'symptoms': 'Abdominal pain, bloating, diarrhea',
    'treatment': 'Diet changes, antispasmodic medication'},
   {'diagnosis': 'Irritable B

In [47]:
a=[1,2,3,4]
b=["apple","ball","cat","dog"]

square= lambda x: x**2
print(list(map(square,a)))
print(list(filter(lambda x: (x**2)%2==0,a)))

[1, 4, 9, 16]
[2, 4]


In [49]:
print(list(zip(a,b)))

[(1, 'apple'), (2, 'ball'), (3, 'cat'), (4, 'dog')]


In [50]:
print(dict(zip(a,b)))

{1: 'apple', 2: 'ball', 3: 'cat', 4: 'dog'}


In [None]:
fruites=["apple","ball","cat","dog"]
d={}
for idx , val in enumerate(fruites,start=1):
    print(idx,val)
    d[idx]=val
print(d)

1 apple
2 ball
3 cat
4 dog
{1: 'apple', 2: 'ball', 3: 'cat', 4: 'dog'}
