In [None]:
#Create a langchain project on langsmith and create an api key

from google.colab import userdata
userdata.get('LANGCHAIN_API_KEY')
userdata.get('OPENAI_API_KEY')
userdata.get('LANGCHAIN_PROJECT')

In [None]:
# Tiktoken is an open-source Python library that breaks text into tokens
#Langchian_community sued to make returiever 
#Used chromaDB for database

!pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain

In [None]:
import langchain
langchain.__version__

In [None]:
#create .env and add OPENAI_API_KEY, LANGCHAIN_API_KEY 

import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY") 
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [None]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [None]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
med_llm = llm.invoke("Tell me how are reference values calculated in lab test reports")
med_llm

In [None]:
#OutputParser for getting only the output
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()
output_parser.invoke(med_llm)

In [None]:
%pip install pymupdf

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Set up OpenAI LLM (Replace with your API Key)
llm = ChatOpenAI(model_name="gpt-4", temperature=0, openai_api_key=os.getenv("OPENAI_API_KEY"))

# Define prompt template
prompt_template = PromptTemplate(
    input_variables=["text"],
    template="Extract only the lab test names from the following medical report:\n{text}\nReturn them as a comma-separated list."
)

# Create LangChain extraction pipeline
chain = LLMChain(llm=llm, prompt=prompt_template)

# Example report
medical_report = """
Patient's CBC report shows Hemoglobin: 13.5 g/dL, WBC Count: 6,500/μL, Platelet Count: 250,000.
Cholesterol levels are high. Liver Function Test: ALT: 45 U/L, AST: 42 U/L.
"""

# Run the extraction
lab_tests = chain.run(medical_report)

print("Extracted Lab Tests:", lab_tests)

In [None]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import PyMuPDFLoader
from langchain.chains import RetrievalQA

#PyMuPDF is a high-performance Python library for data extraction from PDFs
# Load lab report PDFs
loader = PyMuPDFLoader("/content/137253305_bgaawavakhroaw0xoj05dgcq.pdf")
documents = loader.load()

# Create vector store
vectorstore = Chroma.from_documents(documents, embedding=OpenAIEmbeddings())

# Create retriever chain
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})  # Get top 5 most relevant results
qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(), retriever=retriever)

query = "List all lab tests mentioned in the report."
response = qa.run(query)

print("Extracted Lab Tests:", response)

In [None]:
%pip install requests pandas


In [None]:
#Create an account on WHO's ICD https://icd.who.int/en and get access to client id and client secret
import requests

# ICD-11 API Authentication Details
ICD_TOKEN_URL = "https://icdaccessmanagement.who.int/connect/token"
ICD_API_SEARCH_URL = "https://icd.who.int/icdapi/api/v1/icd11/mms/search"

# Replace with your actual API credentials from WHO present in .env
CLIENT_ID = os.getenv("CLIENT_ID")
CLIENT_SECRET = os.getenv("CLIENT_SECRET")
SCOPE = "icdapi_access"
GRANT_TYPE = "client_credentials"

# Function to get an OAuth token
def get_icd11_token():
    payload = {
        "client_id": CLIENT_ID,
        "client_secret": CLIENT_SECRET,
        "scope": SCOPE,
        "grant_type": GRANT_TYPE
    }

    response = requests.post(ICD_TOKEN_URL, data=payload, verify=False).json()

    if "access_token" in response:
        return response["access_token"]
    else:
        print("Error getting token:", response)
        return None

# Function to get ICD-11 codes for lab tests
def get_icd11_code(test_name, token):
    headers = {
        "Authorization": f"Bearer {token}",
        "Accept": "application/json",
        "Accept-Language": "en",
        "API-Version": "v2"
    }

    # Using the standard ICD-11 entity search URL
    search_url = f"{ICD_API_SEARCH_URL}/search"
    params = {"q": test_name, "flatResults": True}  # Improve search by simplifying results

    response = requests.get(search_url, headers=headers, params=params, verify=False)

    if response.status_code == 200:
        data = response.json()
        if "matches" in data and data["matches"]:
            return data["matches"][0]["code"], data["matches"][0]["title"]["@value"]

    return None, None

# Get access token
token = get_icd11_token()

# Example lab tests extracted
lab_tests = ["Cholera", "RBC Count", "Cholesterol"]

# Map lab tests to ICD-11 codes
if token:
    icd_mappings = {test: get_icd11_code(test, token) for test in lab_tests}
    print("ICD-11 Mappings:", icd_mappings)
else:
    print("Failed to get ICD-11 API token.")

In [None]:
import requests
from langchain.tools import RequestsGetTool
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, AgentType
from langchain.tools import Tool
from langchain.requests import RequestsWrapper  # ✅ Import this

# ICD-11 API Authentication Details
ICD_TOKEN_URL = "https://icdaccessmanagement.who.int/connect/token"
ICD_API_SEARCH_URL = "https://id.who.int/icd/entity/search"

# Replace with actual WHO API credentials
CLIENT_ID = os.getenv("CLIENT_ID")
CLIENT_SECRET = os.getenv("CLIENT_SECRET")
SCOPE = "icdapi_access"
GRANT_TYPE = "client_credentials"

# Function to get an OAuth token
def get_icd11_token():
    payload = {
        "client_id": CLIENT_ID,
        "client_secret": CLIENT_SECRET,
        "scope": SCOPE,
        "grant_type": GRANT_TYPE
    }
    try:
        response = requests.post(ICD_TOKEN_URL, data=payload, verify=True)
        response.raise_for_status()
        return response.json().get("access_token")
    except requests.exceptions.RequestException as e:
        print("Error getting token:", e)
        return None

# Initialize Requests Wrapper (✅ Fix for missing `requests_wrapper`)
requests_wrapper = RequestsWrapper()

# LangChain tool for querying ICD-11 API
def fetch_icd11_code(test_name):
    token = get_icd11_token()
    if not token:
        return f"Failed to authenticate with ICD-11 API."

    headers = {
        "Authorization": f"Bearer {token}",
        "Accept": "application/json",
        "Accept-Language": "en",
        "API-Version": "v2"
    }

    params = {"q": test_name, "flatResults": "true"}

    tool = RequestsGetTool(requests_wrapper=requests_wrapper, allow_dangerous_requests=True)  # ✅ Fix applied

    try:
        response = tool.run(ICD_API_SEARCH_URL, params=params, headers=headers)
        data = response.json()

        if "destinationEntities" in data and data["destinationEntities"]:
            entity = data["destinationEntities"][0]
            return f"{test_name} → ICD-11 Code: {entity['@id']}, Title: {entity['title']['@value']}"
        else:
            return f"No ICD-11 mapping found for {test_name}."

    except Exception as e:
        return f"Error fetching ICD-11 code for {test_name}: {str(e)}"

# Initialize LangChain Agent
llm = ChatOpenAI(model_name="gpt-4", temperature=0)

tools = [
    Tool(
        name="ICD-11 Lookup",
        func=fetch_icd11_code,
        description="Use this tool to find ICD-11 codes for medical tests"
    )
]

agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

# Example lab tests
lab_tests = ["Cholera", "RBC Count", "Cholesterol"]

# Get ICD-11 mappings using LangChain
for test in lab_tests:
    print(agent.run(f"Find ICD-11 code for {test}"))

In [None]:
import requests
import json
from langchain.tools import RequestsGetTool
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, AgentType
from langchain.tools import Tool
from langchain.requests import RequestsWrapper  # ✅ Fix: Import RequestsWrapper

# ICD-11 API Authentication Details
ICD_TOKEN_URL = "https://icdaccessmanagement.who.int/connect/token"
ICD_API_SEARCH_URL = "https://id.who.int/icd/entity/search"


# Replace with your actual API credentials from WHO
CLIENT_ID = os.getenv("CLIENT_ID")
CLIENT_SECRET = os.getenv("CLIENT_SECRET")
SCOPE = "icdapi_access"
GRANT_TYPE = "client_credentials"

# Function to get an OAuth token
def get_icd11_token():
    payload = {
        "client_id": CLIENT_ID,
        "client_secret": CLIENT_SECRET,
        "scope": SCOPE,
        "grant_type": GRANT_TYPE
    }
    try:
        response = requests.post(ICD_TOKEN_URL, data=payload, verify=True)
        response.raise_for_status()
        return response.json().get("access_token")
    except requests.exceptions.RequestException as e:
        print("Error getting token:", e)
        return None

# Initialize Requests Wrapper (✅ Fix for missing `requests_wrapper`)
requests_wrapper = RequestsWrapper()

# LangChain tool for querying ICD-11 API
def fetch_icd11_code(test_name):
    token = get_icd11_token()
    if not token:
        return f"Failed to authenticate with ICD-11 API."

    headers = {
        "Authorization": f"Bearer {token}",
        "Accept": "application/json",
        "Accept-Language": "en",
        "API-Version": "v2"
    }

    params = {"q": test_name, "flatResults": "true"}

    tool = RequestsGetTool(requests_wrapper=requests_wrapper, allow_dangerous_requests=True)  # ✅ Fix applied

    try:
        response_str = tool.run(ICD_API_SEARCH_URL, params=params, headers=headers)  # ❌ Returns string, not JSON
        response_json = json.loads(response_str)  # ✅ Fix: Convert string to JSON manually

        if "destinationEntities" in response_json and response_json["destinationEntities"]:
            entity = response_json["destinationEntities"][0]
            return f"{test_name} → ICD-11 Code: {entity['@id']}, Title: {entity['title']['@value']}"
        else:
            return f"No ICD-11 mapping found for {test_name}."

    except json.JSONDecodeError:
        return f"Error decoding JSON response for {test_name}."
    except Exception as e:
        return f"Error fetching ICD-11 code for {test_name}: {str(e)}"

# Initialize LangChain Agent
llm = ChatOpenAI(model_name="gpt-4", temperature=0)

tools = [
    Tool(
        name="ICD-11 Lookup",
        func=fetch_icd11_code,
        description="Use this tool to find ICD-11 codes for medical tests"
    )
]

agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

# Example lab tests
lab_tests = ["Cholera", "RBC Count", "Cholesterol"]

# Get ICD-11 mappings using LangChain
for test in lab_tests:
    print(agent.run(f"Find ICD-11 code for {test}"))

In [None]:
%pip install fuzzywuzzy

In [None]:
#This is for mapping of extracted lab tests to actual icd 11 codes which is exxtracted from ICD 11 WHO Website and pushed as a csv over here ICD_11_Codes.csv
#This is supposed to be made into modular code.
import pandas as pd
from fuzzywuzzy import process  # For approximate text matching

# Load ICD-11 CSV file
icd_data = pd.read_csv("/content/ICD_11_Codes.csv")  # Ensure this file has 'Code' & 'Title'

# Ensure 'Title' column is properly formatted
icd_data['Title'] = icd_data['Title'].astype(str).str.lower().str.strip()

# Convert the 'Title' column to a list for fuzzy matching
icd_titles = icd_data['Title'].tolist()

# Sample lab tests list
lab_tests = ["Cholera", "RBC Count", "Cholesterol"]

# Function to find the best ICD-11 match
def find_icd_code(test_name):
    test_name = test_name.lower().strip()

    # Ensure there are valid titles in the list
    if not icd_titles:
        return "No ICD-11 match found"

    # Perform fuzzy matching
    match = process.extractOne(test_name, icd_titles)

    if match:  # Ensure a match was found
        best_match, score = match  # Unpacking correctly
        if score > 80:  # Only consider matches above a threshold (80% similarity)
            icd_code = icd_data.loc[icd_data['Title'] == best_match, 'Code'].values
            return icd_code[0] if len(icd_code) > 0 else "No ICD-11 match found"

    return "No ICD-11 match found"

# Map lab tests to ICD-11 codes
icd_mapping = {test: find_icd_code(test) for test in lab_tests}

# Print results
print("ICD-11 Mappings:", icd_mapping)