In [55]:
import re
import os
import PyPDF2
from transformers import pipeline
from langchain import PromptTemplate, LLMChain, OpenAI
from langchain_community.embeddings import OllamaEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_experimental.data_anonymizer import PresidioAnonymizer
from langchain_core.documents import Document

In [65]:
from presidio_anonymizer.entities import OperatorConfig

anonymizer = PresidioAnonymizer(analyzed_fields=["PERSON", "PHONE_NUMBER", "EMAIL_ADDRESS"],
    operators={
        "PERSON": OperatorConfig("redact", {}),
        "PHONE_NUMBER": OperatorConfig("redact", {}),
        "EMAIL_ADDRESS": OperatorConfig("redact", {}),
    })

In [66]:
# Function to anonymize PII from the extracted text
def anonymize_pii(text):
    return anonymizer.anonymize(text)

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
    return anonymize_pii(text)

# Extracting text from all PDFs in a directory
pdf_dir = './data/'
documents = []

In [67]:
for filename in os.listdir(pdf_dir):
    if filename.endswith('.pdf'):
        text = extract_text_from_pdf(os.path.join(pdf_dir, filename))
        documents.append(Document(page_content=text))

# Step 1: Create embeddings for the documents
embedding_model = OllamaEmbeddings(model="llama3.1")
docsearch = FAISS.from_documents(documents, embedding_model)

In [68]:
from langchain_community.llms import Ollama
llm = Ollama(model="llama3.1")

In [73]:
from langchain_core.prompts import PromptTemplate

prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are an assistant that helps users with their medical analysis. The following context may contain sensitive information. Ensure that your response does not include any Personally Identifiable Information (PII) such as names, phone numbers, email addresses.

Context: {context}
Query: {question}

Provide a safe and anonymized response that answers the query while respecting privacy guidelines.
"""
)

In [74]:
# Step 3: Create a RetrievalQA chain using the local LLM
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(),
    chain_type_kwargs={"prompt": prompt_template}
)

# # Step 4: Define a query
# query = "Show me any information about the medical data that was collected."

# # Step 5: Run the query
# result = qa_chain({ "query": query })
# print(result)

In [75]:
# Step 4: Define a query
query = "Show me any information related to back pain of John Doe."

# Step 5: Run the query
result = qa_chain({ "query": query })
print(result['result'])

I cannot provide medical information about a specific person. If you would like, I can help answer general questions or discuss common topics related to back pain. Would that be helpful?


In [24]:
import nest_asyncio

nest_asyncio.apply()

In [35]:
from nemoguardrails import RailsConfig, LLMRails
from nemoguardrails.integrations.langchain.runnable_rails import RunnableRails

config = RailsConfig.from_path("./guardrails")
app = LLMRails(config,llm=llm)


In [46]:
app.register_action(qa_chain, name="qa_chain")

In [76]:
# Step 4: Define a query
query = "Show me any information related to  GENERAL HEALTH SCREENING in Pre-Employment Health Assessment."

# Step 5: Run the query
result = qa_chain({ "query": query })
print(result['result'])

Based on the provided context, I can show you the relevant information from PART C: GENERAL HEALTH SCREENING without revealing any Personally Identifiable Information (PII).

Here is the extracted information:

1. Have you been involved in any motor vehicle accidents resulting in personal injury? No
2. Have you ever lodged a claim for workers compensation? No
3. Have you suffered back pain or strain injury (including back surgery)? No
4. Have you suffered from shoulder, neck or arm pain or strain? No
5. Have you suffered from hip, knee or ankle pain? No
6. Have you had a full medical clearance for any injury identified in questions 1 to 5? N/A
7. Are you receiving any ongoing treatment for injuries identified in questions 1 to 5? No
8. Do you suffer from any medical condition (including physical, psychiatric, psychological) for which you are receiving treatment? No

Health Conditions:
- Lower back, neck or thoracic spinal pain? No
- Sciatica? No
- Wrist or elbow pain or weakness? No
- 

In [50]:
# mode == "chain_with_guardrails":
history = [{"role": "user", "content": query}]
#
result = await app.generate_async(messages=history)
print(result['content'])

I'd be happy to help you with that! General health screening is a crucial component of pre-employment health assessments, and it's typically conducted by healthcare professionals. The process usually involves a thorough medical examination, review of past medical history, and laboratory tests to identify any underlying conditions that may affect an individual's ability to perform their job duties. However, I need more specific information about the type of screening you're looking for, such as the industry or occupation, in order to provide more tailored results.
