In [2]:
import os
import getpass
from dotenv import load_dotenv

# key="AIzaSyA6A5iO69dZGnWbFFYI0f5ITgRCqcXVymM"
# Load variables from .env file if it exists
load_dotenv()

# Check if API key is already in environment
if "GOOGLE_API_KEY" not in os.environ:
    # Ask user to enter key securely (input hidden)
    os.environ["GOOGLE_API_KEY"] ="AIzaSyA6A5iO69dZGnWbFFYI0f5ITgRCqcXVymM"

# Optional: silence gRPC ALTS warnings
os.environ["GRPC_VERBOSITY"] = "ERROR"
os.environ["GRPC_TRACE"] = ""

# Example usage
print("Google API key loaded successfully!")


Google API key loaded successfully!


In [3]:

from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)

In [4]:
from langchain_community.document_loaders import PyPDFLoader

pdf_path = "Health_policy.pdf" 
loader = PyPDFLoader(pdf_path)
docs = loader.load()


policy_text = " ".join([d.page_content for d in docs])

print("Policy document loaded. Total characters:", len(policy_text))


print("\n--- Full document text ---\n")
print(policy_text[:2000]) 


Policy document loaded. Total characters: 300406

--- Full document text ---

  An Independent Licensee of the
Blue Cross and Blue Shield Association.
Three Penn Plaza East
Newark, NJ 07105-2200
HorizonBlue.com
Dear Valued Customer:
Thank you for choosing Horizon Blue Cross Blue Shield of New Jersey for your health insurance
coverage. We are here to help you understand your benefits and take charge of your health.
The enclosed information will help you better understand your benefits and the value-added programs
available to you as a Horizon BCBSNJ member.
Here are some important tips you should follow:
Keep your Horizon BCBSNJ member ID card with you at all times . It is the key to accessing your
health care benefits. Please present your member ID card whenever you need medical care or services.
Visit directory.HorizonBlue.com/DoctorFinder to use our online Doctor & Hospital Finder.
Here, you can find a participating doctor, hospital or other health care professional. If you would lik

In [5]:
from langchain_core.prompts import PromptTemplate

condition_extraction_template = PromptTemplate(
    template=(
        """
      "You are a medical examiner and medical coding expert. You are analyzing a patient's medical report "
        "to extract all medical conditions that need ICD-10 or HCPCS codes.\n\n"
        "policy :\n"
        "{policy_text}\n\n"
        "Your Tasks:\n"
        "1. Extract ALL medical conditions, diseases, diagnoses, disorders, injuries, and health issues mentioned in the report.\n"
        "2. Include current conditions, past medical history, primary and secondary diagnoses.\n"
        "3. Be comprehensive ‚Äî extract all relevant medical terms.\n"
        "4. Keep phrases short and specific (like ICD-10 terminology).\n"
        "5. Return results as comma-separated values.\n\n"
        "Output format:\n"
        "- Comma-separated list of medical conditions\n"
        "- No numbering, no bullets, no explanations\n"
        "- Example: Type 2 Diabetes, Hypertension, Hyperlipidemia\n\n"
        "Medical¬†Conditions:"
        üßæ Input Policy Text:
        {policy_text}

        """
    ),
    input_variables=["policy_text"]
)


In [None]:


print("Extracting ICD descriptions from full policy document ...")


icd_prompt = condition_extraction_template.format(policy_text=policy_text)


response = llm.invoke(icd_prompt).content


all_icd_descriptions = list(set(
    [desc.strip().strip('"') for desc in response.splitlines() if desc.strip()]
))

print("Final Extracted ICD Descriptions from Policy:")
for desc in all_icd_descriptions:
    print(f"- {desc}")


Extracting ICD descriptions from full policy document ...


In [None]:
condition_extraction_template = PromptTemplate(
    template=(
        """
"You are a medical coding expert specializing in ICD-10 and HCPCS coding.\n\n"
        "Medical Conditions:\n"
        "{all_icd_descriptions}\n\n"
        "Your Task:\n"
        "Convert each medical condition into its standardized ICD-10 or HCPCS code description. "
        "Use the exact medical terminology that appears in ICD-10/HCPCS codebooks.\n\n"
        "Rules:\n"
        "- Be specific (e.g., 'Type 2 diabetes mellitus' not just 'diabetes')\n"
        "- Use standard medical terminology\n"
        "- Keep the same order as input\n"
        "- Return as comma-separated values\n"
        "- No explanations or extra text\n\n"
        "ICD/HCPCS¬†Descriptions:"

        """
    ),
    input_variables=["all_icd_descriptions"]
)

In [None]:
print("üîé Extracting ICD descriptions from full policy document ...")


icd_prompt = condition_extraction_template.format(all_icd_descriptions=all_icd_descriptions)


response = llm.invoke(icd_prompt).content


final_icd_descriptions = list(set(
    [desc.strip().strip('"') for desc in response.splitlines() if desc.strip()]
))

print("Final Extracted ICD Descriptions from Policy:")
for desc in final_icd_descriptions:
    print(f"- {desc}")

In [1]:
from qdrant_client import QdrantClient


client = QdrantClient(host="localhost", port=6333)


print(client.get_collections())

In [None]:
def format_search_output(query, results):
    print(f"\nQuery: {query}")
    print("Results:")
    
    code_desc_pairs = []
    for result in results:
        code = result['code']
        short_desc = result['short']
        long_desc = result.get('long', '')
        score = result['similarity_score']
        
        # Print neatly
        print(f"  {code} - {short_desc}")
        if long_desc:
            print(f"     ‚Üí {long_desc}")
        print(f"     (Score: {score:.3f})")
        
        # Store both code and description
        code_desc_pairs.append({
            'code': code,
            'short': short_desc,
            'long': long_desc,
            'score': score
        })
    
    return code_desc_pairs





In [None]:
def search_icd(queries_list, top_k=1):
    all_results = {}
    for i, query in enumerate(queries_list, 1):
        results = search_icd_codes(query, top_k=top_k)
        code_desc_pairs = format_search_output(query, results)
        all_results[query] = code_desc_pairs
    return all_results

test_queries = final_icd_descriptions
results = search_icd(test_queries, top_k=1)

print("\n=== Final ICD Summary ===")
for query, items in results.items():
    print(f"\nQuery: {query}")
    for item in items:
        print(f"  {item['code']} - {item['short']}")

In [None]:
# temp_icd=["S75.122D", "S75.122A", "S75.129D", "S75.129A", "S75.029D","73.9", "Z73.8", "Z73.89", "W36.9", "Z73"]

In [None]:
from langchain_core.prompts import PromptTemplate

# ‚úÖ Define the LangChain prompt
template = PromptTemplate(
    template=(
        "You have to explain what the following ICD codes mean in detail: {icd_codes}. "
        "These ICD codes are related to diseases. "
        "Explain what the patient can do to avoid these conditions and give remedies point-wise. "
        "Also, explain how Social Determinants of Health (like income, education, housing, access to healthy food, and social support) can affect these diseases. "
        "Suggest practical resources or community programs that might help. "
        "Finally, suggest which ICD codes are covered in this data: {temp_icd}."
    ),
    input_variables=["icd_codes", "temp_icd"],
    validate_template=True
)

# ‚úÖ Example: Use the ICD search results
# (Assume `results` is the dictionary returned by search_icd)
# Flatten all ICD codes
all_icd_codes = [item['code'] for query in results.values() for item in query]

# Convert the whole dictionary to a readable string
import json
temp_icd_str = json.dumps(results, indent=2)

# ‚úÖ Format the final prompt text
prompt_text = template.format(
    icd_codes=", ".join(all_icd_codes),
    temp_icd=temp_icd_str
)

print(prompt_text)


In [None]:
from langchain_core.prompts import PromptTemplate
from pydantic import BaseModel, Field

# ‚úÖ 1. Define Pydantic model for structured output
class review_pydantic(BaseModel):
    summary: str = Field(
        description="Explain all about those ICD codes in simple language (2‚Äì3 lines)."
    )
    solution: str = Field(
        description="List practical steps to avoid or manage the condition, point-wise (one per line)."
    )
    current_icd_codes: str = Field(
        description="These are the ICD codes provided by the user input, e.g., E11."
    )
    covered_icd_codes: str = Field(
        description="These ICD codes are covered in the insurance policy provided by the user."
    )
    sdoh_factors: str = Field(
        description="Explain how social determinants like income, housing, food access, or stress influence this condition, point-wise."
    )
    resources: str = Field(
        description="Provide community or general support resources for managing the condition, point-wise."
    )
    risk_population: str = Field(
        description="Highlight groups more vulnerable due to social or economic conditions, point-wise."
    )

# ‚úÖ 2. Define LangChain prompt
template = PromptTemplate(
    template=(
        "You are a health assistant. Explain in detail what these ICD codes mean: {icd_codes}. "
        "These ICD codes are disease-related. "
        "Provide the following structured information:\n"
        "1. Summary ‚Äî explain in 2‚Äì3 easy lines.\n"
        "2. Solution ‚Äî what the patient can do to avoid it (point-wise).\n"
        "3. SDOH Factors ‚Äî how income, housing, education, or food access affect it (point-wise).\n"
        "4. Resources ‚Äî community or public programs that help (point-wise).\n"
        "5. Risk Population ‚Äî who are most vulnerable (point-wise).\n"
        "Also, analyze which ICD codes from {temp_icd} are covered by insurance."
    ),
    input_variables=["icd_codes", "temp_icd"],
    validate_template=True
)

# ‚úÖ 3. Generate prompt text using your ICD dict
import json

# Suppose you already have `results` from search_icd()
all_icd_codes = [item['code'] for query in results.values() for item in query]
temp_icd_str = json.dumps(results, indent=2)

prompt_text = template.format(
    icd_codes=", ".join(all_icd_codes),
    temp_icd=temp_icd_str
)

# ‚úÖ 4. Invoke your LLM with structured output
pydantic_model = llm.with_structured_output(review_pydantic)
result = pydantic_model.invoke(prompt_text)

# ‚úÖ 5. Pretty print output
print("SUMMARY:\n", result.summary)
print("-" * 120)
print("SOLUTION:\n", result.solution)
print("-" * 120)
print("CURRENT ICD CODES:\n", result.current_icd_codes)
print("-" * 120)
print("COVERED ICD CODES:\n", result.covered_icd_codes)
print("-" * 120)
print("SDOH FACTORS:\n", result.sdoh_factors)
print("-" * 120)
print("RESOURCES:\n", result.resources)
print("-" * 120)
print("RISK POPULATION:\n", result.risk_population)


In [51]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import LLMChain
from langchain.schema import HumanMessage, AIMessage
from langchain_google_genai import ChatGoogleGenerativeAI


context_message = (
    f"ü©∫ **Medical Summary**:\n{first_llm_output['summary']}\n\n"
    f"üí° **Solutions / Preventive Steps**:\n- " + "\n- ".join(first_llm_output["solution"]) + "\n\n"
    f"üìò **Current ICD Codes**: {', '.join(first_llm_output['current_icd_codes'])}\n"
    f"üìã **Covered ICD Codes**: {', '.join(first_llm_output['covered_icd_codes'])}\n\n"
    f"üåç **Social Determinants of Health (SDOH) Factors**:\n- " + "\n- ".join(first_llm_output["SDOH_Factors"]) + "\n\n"
    f"üè• **Helpful Resources**:\n- " + "\n- ".join(first_llm_output["Resources"]) + "\n\n"
    f"‚ö†Ô∏è **High-Risk Populations**:\n- " + "\n- ".join(first_llm_output["Risk_Population"])
)

result = [HumanMessage(content=context_message)]

chat_template = ChatPromptTemplate.from_messages([
    ("system", "You are a knowledgeable and empathetic medical assistant. "
               "Use the provided medical context to answer questions clearly and safely."),
    MessagesPlaceholder(variable_name="result"),         # includes the medical summary
    MessagesPlaceholder(variable_name="chat_history"),   # includes the conversation
    ("human", "{query}")                                 # new user query
])


llm_chain = LLMChain(llm=llm, prompt=chat_template, verbose=False)

chat_history = []

print("ü©∫ Medical Assistant is ready! Type your question or 'exit' to stop.\n")

while True:
    query = input("You: ")
    if query.lower() in ["exit", "quit"]:
        print("Assistant: Goodbye! Take care.")
        break

    chat_history.append(HumanMessage(content=query))

    response = llm_chain.invoke({
        "result": result,
        "chat_history": chat_history,
        "query": query
    })

    assistant_reply = response["text"]

    chat_history.append(AIMessage(content=assistant_reply))

    print("\nAssistant:", assistant_reply)
    print("------------------------------------------------------\n")


ü©∫ Medical Assistant is ready! Type your question or 'exit' to stop.



You:  "What disease am I currently suffering from?"


Assistant: Based on the provided information, you are currently suffering from **disorders of the pituitary gland, specifically hyperfunction (meaning the gland produces too much of one or more hormones)**. This is indicated by the ICD code E22.


You:  this disease covered in my policy


Assistant: Based on the "Covered ICD Codes" provided, your current ICD code **E22** (disorders of the pituitary gland, specifically hyperfunction) is **not listed** among the covered codes.

You would need to contact your insurance provider directly to confirm coverage for E22, as the provided list may not be exhaustive or up-to-date with your specific policy.


You:  what i can do then 


Assistant: Given that your current ICD code E22 (disorders of the pituitary gland, specifically hyperfunction) is not listed in the "Covered ICD Codes" provided, here's what you can do:

1.  **Contact Your Insurance Provider Directly:**
    *   This is the most crucial step. The list provided might not be exhaustive or specific to your individual policy. Call the number on your insurance card.
    *   Ask them specifically if ICD code E22 is covered under your plan.
    *   Inquire about the process for pre-authorization or referrals if needed for specialists like an endocrinologist.

2.  **Discuss with Your Doctor/Endocrinologist:**
    *   Inform your doctor's office about your concerns regarding insurance coverage. They may have experience dealing with similar situations and can help advocate for you.
    *   They might be able to provide additional documentation or use a different, covered ICD code if appropriate and medically accurate for your condition.

3.  **Understand the Appe

You:  quit


Assistant: Goodbye! Take care.
