In [5]:
!pip install langchain langchain-community langchain-ollama pandas langchain-openai python-dotenv pypdf openpyxl

Collecting openpyxl
  Using cached openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Using cached et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Using cached openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Using cached et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5



[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
!ollama run llama-3.1:8b

^C


In [2]:
import re
import pandas as pd
from dotenv import dotenv_values
from langchain_core.prompts.chat import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import InMemoryVectorStore
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
#from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_core.runnables import (
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)

In [3]:
config = dotenv_values(".env")

In [4]:
# Instantiate the OpenAI client
secret_key = config['OPENAI_API_KEY']
client = ChatOpenAI(api_key=secret_key, model='gpt-4o-mini')
embeddings=OpenAIEmbeddings(api_key=secret_key, model='gpt-4o-mini')
vstore = InMemoryVectorStore(embeddings)

In [4]:

#pdf_loader = PyPDFLoader("./aiswre/data/ISO+13485-2016.pdf")
pdf_loader = PyPDFLoader("C:/Users/dsobc/Downloads/ISO+13485-2016 (2).pdf")
splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64)
docs_from_pdf = pdf_loader.load_and_split(text_splitter=splitter)

print(f"Documents from PDF: {len(docs_from_pdf)}.")
#print(docs_from_pdf[0])
inserted_ids_from_pdf = vstore.add_documents(docs_from_pdf)
print(f"Inserted {len(inserted_ids_from_pdf)} documents.")
#print(inserted_ids_from_pdf[0])
df = pd.DataFrame({
    'inserted_id': [i for i in inserted_ids_from_pdf],
    'chunk':[d.page_content for d in docs_from_pdf]
})
df.to_excel("./aiswre/data/IEC_62304-2006_sample_chunks.xlsx")

NotImplementedError: only Standard PDF encryption handler is available

In [7]:
retriever = vstore.as_retriever(search_kwargs={"k": 3})

excerpt="""
IEC 62304:2006 defines in section 4.3 the software safety classes, based only on the consequence of a hazardous situation on the patient:

Class A: No injury or damage to health is possible
Class B: Non-SERIOUS INJURY is possible
Class C: Death or SERIOUS INJURY is possible
Another way of viewing this definition is to disregard the probability of risks linked to a software failure, and to focus only on the severity.
"""

retrieved_docs = vstore.similarity_search(excerpt)

print(retrieved_docs)

quality_template = """
You are a software quality engineer currently reviewing medical device documentation and need to provide feedback on how well the documentation aligns with compliance standards. Use the provided context as to support your answers and do not make anything up.

CONTEXT:
{context}

QUESTION: {question}

YOUR ANSWER:"""

quality_prompt = ChatPromptTemplate.from_template(quality_template)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | quality_prompt
    | client
    | StrOutputParser()
)

q="""
What is the recommended approach to assigning a Software Safety Classification?  
"""
result = chain.invoke(q)
print(result)

PermissionDeniedError: Error code: 403 - {'error': {'message': 'You are not allowed to generate embeddings from this model', 'type': 'invalid_request_error', 'param': None, 'code': None}}

In [5]:
cfr_template = """
Generate a statement in the format as given by the provided example

## Examples
---
{examples}
Reference: {reference}
Question: {question}
Statement:
"""

cfr_prompt = ChatPromptTemplate.from_template(cfr_template)

chain = (
    cfr_prompt
    | client
    | StrOutputParser()
)

examples = """
Reference: 21CFR803.3(b)
Question: Does the Reportable Events process list the definition of the term "become aware" as that "an employee of the entity required to report has acquired information that reasonably suggests a reportable adverse event has occurred"?  Answer must be Yes or No only.
Statement: Per 21CFR803.3(b), the term "become aware" means "an employee of the entity required to report has acquired information that reasonably suggests a reportable adverse event has occurred". It is recommended you add this into your Medical Device Reporting Vigilance process documentation.

Reference: 21CFR803.3(b)
Question: Does the Reportable Events process list the definition of the term "caused or contributed to" that "a death or serious injury was or may have been attributed to a medical device, or that a medical device was or may have been a factor in a death or serious injury, including events occurring as a result of:  Failure, Malfunction, Improper or inadequate design, Manufacture, Labeling, or User error"?  Answer must be Yes or No only.
Statement: Per 21CFR803.3(b), the term "become aware" means "an employee of the entity required to report has acquired information that reasonably suggests a reportable adverse event has occurred". It is recommended you add this into your Medical Device Reporting Vigilance process documentation.

Reference: 21CFR803.3(k)
Question: Does the Reportable Events process list the definition of the term "malfunction" as "the failure of a device to meet its performance specifications or otherwise perform as intended"? Answer must be Yes or No only.
Statement: Per 21CFR803.3(k), the term "malfunction" means "the failure of a device to meet its performance specifications or otherwise perform as intended". It is recommended you add this into your Medical Device Reporting Vigilance process documentation.

Reference: 21CFR803.17 & 21CFR803.50(3)
Question: Does the Reportable Events procedure list the notification of events that may be subject to Medical Device Report (MDR) requirements? Answer must be Yes or No only.
Statement: Per 21CFR803.17 & 21CFR803.50(3), notification of events that may be subject to Medical Device Report (MDR) requirements are required. It is recommended you add this into your Medical Device Reporting Vigilance process documentation. 
"""

reference= "21CFR803.3(k)"
question= "Does the Reportable Events process list the term \"intended performance of a device\" as the intended use for which the device is labeled or marketed? Answer must be Yes or No only."

result = chain.invoke({"examples": examples, "reference": reference, "question": question})
print(result)

Per 21CFR803.3(k), the term "intended performance of a device" means the intended use for which the device is labeled or marketed. It is recommended you add this into your Medical Device Reporting Vigilance process documentation.


In [6]:
# load filtered question list dataframe
df = pd.read_excel("./aiswre/data/djs_question_list_30mar25.xlsx")
df_filt = df[['Question ID', 'Question (for LLM Model Input)','Recommended Action (App \'Learn more\' textbox popup)']]

In [7]:
df_filt.dropna(subset=['Recommended Action (App \'Learn more\' textbox popup)'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filt.dropna(subset=['Recommended Action (App \'Learn more\' textbox popup)'], inplace=True)


In [8]:
df_filt.info()

<class 'pandas.core.frame.DataFrame'>
Index: 127 entries, 1 to 142
Data columns (total 3 columns):
 #   Column                                               Non-Null Count  Dtype 
---  ------                                               --------------  ----- 
 0   Question ID                                          127 non-null    object
 1   Question (for LLM Model Input)                       127 non-null    object
 2   Recommended Action (App 'Learn more' textbox popup)  127 non-null    object
dtypes: object(3)
memory usage: 4.0+ KB


In [None]:
#mask = (((df_filt['Recommended Action (App \'Learn more\' textbox popup)'].str.startswith('21C'))) | ((df_filt['Recommended Action (App \'Learn more\' textbox popup)'].str.startswith('CMDR')))
#        | ((df_filt['Recommended Action (App \'Learn more\' textbox popup)'].str.startswith('Medical Device Reporting'))))
#df_filt = df_filt[mask]

In [9]:
df_filt['reference'] = df_filt['Recommended Action (App \'Learn more\' textbox popup)'].apply(lambda s: re.search(r"[^ ]+",s).group(0))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filt['reference'] = df_filt['Recommended Action (App \'Learn more\' textbox popup)'].apply(lambda s: re.search(r"[^ ]+",s).group(0))


In [10]:
df_filt['reference_concat'] = df_filt['reference'].apply(lambda s: ' & '.join(s.split("\n")))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filt['reference_concat'] = df_filt['reference'].apply(lambda s: ' & '.join(s.split("\n")))


In [11]:
df_filt.head(5)

Unnamed: 0,Question ID,Question (for LLM Model Input),Recommended Action (App 'Learn more' textbox popup),reference,reference_concat
1,Vigilance3,Does the Quality Manual reference an Medical D...,"Per ISO 13485 & FDA, the Quality Manual needs ...",Per,Per
2,Vigilance4,Is there a procedure that establishes the requ...,"Per ISO 13485 & FDA, there must be a Reportabl...",Per,Per
3,Vigilance5,Does the Reportable Events procedure list the ...,ISO 13485 requires FDA notification of complai...,ISO,ISO
4,Vigilance6,Does the Reportable Events procedure list the ...,21CFR803.17 requires identification of events ...,21CFR803.17,21CFR803.17
5,Vigilance7,Does the Reportable Events procedure list the ...,21CFR803.17 requires communication of events t...,21CFR803.17,21CFR803.17


In [12]:
df_filt['reference_concat'].sample(3)

14                                            According
92                                                 CMDR
28    21CFR803.10('c)(2)(i) & 21CFR803.20.(b)(3)(iii...
Name: reference_concat, dtype: object

In [13]:
cfr_template = """
Generate a statement in the format as given by the provided Examples

## Examples
---
Reference: 21CFR803.3(b)
Question: Does the Reportable Events process list the definition of the term "become aware" as that "an employee of the entity required to report has acquired information that reasonably suggests a reportable adverse event has occurred"?  Answer must be Yes or No only.
Statement: Per 21CFR803.3(b), the term "become aware" means "an employee of the entity required to report has acquired information that reasonably suggests a reportable adverse event has occurred". It is recommended you add this into your Medical Device Reporting Vigilance process documentation.

Reference: 21CFR803.3(b)
Question: Does the Reportable Events process list the definition of the term "caused or contributed to" that "a death or serious injury was or may have been attributed to a medical device, or that a medical device was or may have been a factor in a death or serious injury, including events occurring as a result of:  Failure, Malfunction, Improper or inadequate design, Manufacture, Labeling, or User error"?  Answer must be Yes or No only.
Statement: Per 21CFR803.3(b), the term "become aware" means "an employee of the entity required to report has acquired information that reasonably suggests a reportable adverse event has occurred". It is recommended you add this into your Medical Device Reporting Vigilance process documentation.

Reference: 21CFR803.3(k)
Question: Does the Reportable Events process list the definition of the term "malfunction" as "the failure of a device to meet its performance specifications or otherwise perform as intended"? Answer must be Yes or No only.
Statement: Per 21CFR803.3(k), the term "malfunction" means "the failure of a device to meet its performance specifications or otherwise perform as intended". It is recommended you add this into your Medical Device Reporting Vigilance process documentation.

Reference: 21CFR803.17 & 21CFR803.50(3)
Question: Does the Reportable Events procedure list the notification of events that may be subject to Medical Device Report (MDR) requirements? Answer must be Yes or No only.
Statement: Per 21CFR803.17 & 21CFR803.50(3), notification of events that may be subject to Medical Device Report (MDR) requirements are required. It is recommended you add this into your Medical Device Reporting Vigilance process documentation. 

Reference: {reference}
Question: {question}
Statement:
"""

cfr_prompt = ChatPromptTemplate.from_template(cfr_template)

templates={
    'cfr': {
        'template': cfr_prompt
    }
}

In [14]:
chain_contexts = []

for index, row in df_filt.iterrows():

    chain_contexts.append(
        {
            'reference': row['reference_concat'],
            'question': row['Question (for LLM Model Input)']
        }
    )

In [15]:
from aiswre.promptengg.promptrunner import ParallelPromptRunner

In [16]:
pr = ParallelPromptRunner(
    use_structured_llm=False,
    llm=ChatOpenAI(api_key=secret_key, model='gpt-4o-mini'),
    pydantic_model=False,
    chain_contexts=chain_contexts,
    num_trials=1
)

In [17]:
import nest_asyncio

In [18]:
nest_asyncio.apply()

In [19]:
results = pr.run(
    prompt_type='cfr',
    templates=templates
)

Awaiting results...
Results fetched...


In [20]:
len(results)

127

In [21]:
df_filt['generated_recommended_action'] = [r.content for r in results] 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filt['generated_recommended_action'] = [r.content for r in results]


In [22]:
df_filt.to_excel('./aiswre/data/djs-results-columnP-initial-task-6-apr.xlsx')