In [59]:
from pypdf import PdfReader

In [60]:
reader = PdfReader("data\comprehensive-clinical-nephrology.pdf")

In [61]:
print(len(reader.pages))
print(reader.pages[0].extract_text())

1547
RICHARD J.JOHNSON
JURGEN FLOEGE
MARCELLO TONELLI
Enhanced
DIGITAL
VERSION
Included
COMPREHENSIVE
CLINICAL
NEPHROLOGY
i
m


In [62]:
pages_text = []
for page in reader.pages:
    pages_text.append(page.extract_text())

print(pages_text[3])

ELSEVIER
1600 John F. Kennedy Blvd.
Suite 1600
Philadelphia, Pennsylvania
COMPREHENSIVE CLINICAL NEPHROLOGY, SEVENTH EDITION ISBN: 978-0-323-82592-4
Copyright © 2024 by Elsevier, Inc. All rights reserved.
No part of this publication may be reproduced or transmitted in any form or by any means, electronic or 
mechanical, including photocopying, recording, or any information storage and retrieval system, without 
permission in writing from the publisher. Details on how to seek permission, further information about the 
Publisher’s permissions policies and our arrangements with organizations such as the Copyright Clearance 
Center and the Copyright Licensing Agency, can be found at our website: www.elsevier.com/permissions
This book and the individual contributions contained in it are protected under copyright by the Publisher 
(other than as may be noted herein).
Notices
Knowledge and best practice in this eld are constantly changing. As new research and experience broad-
en our underst

In [63]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [64]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1200,
	chunk_overlap=200,
    separators=["\n\n", "\n", " ", ""]
)

In [65]:
documents = []

In [66]:
from langchain_core.documents import Document

In [67]:
for page_num,page in enumerate(pages_text):
    if not pages_text:
        continue
    
    splits = splitter.split_text(page)
    for i,split in enumerate(splits):
        metadata = {
            "source": "comprehensive-clinical-nephrology",
			"page": page_num,
			"chunk": i
		}
        doc = Document(page_content=split, metadata=metadata)
        documents.append(doc)	

In [57]:
from langchain_huggingface import HuggingFaceEmbeddings

In [58]:
from langchain_chroma import Chroma

In [68]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [71]:
vector_db = Chroma.from_documents(
documents, 
embedding=embeddings,
persist_directory="chroma_db_v2/clinical-nephrology_db"
)


KeyboardInterrupt: 

In [74]:
retriver = vector_db.as_retriever(search_kwargs={"k":3})
query = "What are the symptoms of chronic kidney?"
results = retriver.get_relevant_documents(query)
for r in results:
    print(r.page_content)
    print(r.metadata)

in dialysis discontinuation and survival after hospitalization for serious 
conditions among patients on maintenance dialysis. J Am Soc Nephrol. 
2020;31:149–160.
41. Murtagh FE, Addington-Hall JM, Edmonds PM, et al. Symptoms in 
advanced renal disease: a cross-sectional survey of symptom prevalence 
in stage 5 chronic kidney disease managed without dialysis. J Palliat Med. 
2007;10(6):1266–1276.
42. Davison SN, Tupala B, Wasylynuk BA, Siu V, Sinnarajah A, Triscott J. 
Recommendations for the care of patients receiving conservative kidney 
management: focus on management of CKD and symptoms. Clin J Am 
Soc Nephrol. 2019;14:626–634.
{'chunk': 7, 'page': 1241, 'source': 'comprehensive-clinical-nephrology'}
CKD is dened as abnormalities of kidney structure or function, present for 
more than 3 months, with implications for health. These may include the 
following:
Markers of kidney 
damage
Albuminuria (AER ≥ 30 mg/24 h; uACR ≥ 30 mg/g [≥
3 mg/mmol])
Urine sediment abnormalities
Electroly

In [77]:
emb_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vect_store = Chroma(
    embedding_function=emb_model,
    persist_directory="chroma_db_v2/clinical-nephrology_db"
)

In [78]:
docs = vect_store.similarity_search(query, k=6)
print(docs)

[Document(id='2addf77a-a912-4a0b-8fed-80ce335b41b5', metadata={'source': 'comprehensive-clinical-nephrology', 'page': 1241, 'chunk': 7}, page_content='in dialysis discontinuation and survival after hospitalization for serious \nconditions among patients on maintenance dialysis. J Am Soc Nephrol. \n2020;31:149–160.\n41. Murtagh FE, Addington-Hall JM, Edmonds PM, et al. Symptoms in \nadvanced renal disease: a cross-sectional survey of symptom prevalence \nin stage 5 chronic kidney disease managed without dialysis. J Palliat Med. \n2007;10(6):1266–1276.\n42. Davison SN, Tupala B, Wasylynuk BA, Siu V, Sinnarajah A, Triscott J. \nRecommendations for the care of patients receiving conservative kidney \nmanagement: focus on management of CKD and symptoms. Clin J Am \nSoc Nephrol. 2019;14:626–634.'), Document(id='1e22d64a-ffa6-4b9b-85c7-1b65b02236fb', metadata={'page': 1126, 'source': 'comprehensive-clinical-nephrology', 'chunk': 1}, page_content='CKD is de\ue01fned as abnormalities of kidney 

In [50]:
from langchain_groq import ChatGroq
from langchain_core.messages import SystemMessage, HumanMessage

In [46]:
import os

In [47]:
from dotenv import load_dotenv
load_dotenv()

True

In [53]:
from groq import Groq

In [54]:
client = Groq(api_key=os.getenv("GROQ_API_KEY"))

In [56]:
response = client.chat.completions.create(
    model="openai/gpt-oss-20b",
    messages=[
        {"role": "system", "content": "You are a friendly assistant."},
        {"role": "user", "content": "Hello, how are you?"},
    ]
)

print(response.choices[0].message.content)

I’m doing great, thanks for asking! How are you doing today?
