## Using GPT model


In [8]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
MODEl = "gpt-3.5-turbo"

In [None]:
from langchain_openai.chat_models import ChatOpenAI

model = ChatOpenAI(api_key = OPENAI_API_KEY, model = MODEl)
model.invoke("Tell me a joke")


## Using LLama model


In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
MODEL = "llama2"

In [2]:
from langchain_community.llms import Ollama

model = Ollama(model = MODEL)
model.invoke("Tell me a joke")

"Sure! Here's one:\n\nWhy don't scientists trust atoms?\nBecause they make up everything!\n\nI hope you found that amusing! Do you want to hear another one?"

In [3]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser

chain.invoke("Tell me a joke")

"Sure! Here's a quick one:\n\nWhy don't scientists trust atoms?\nBecause they make up everything!\n\nI hope that brought a smile to your face! Do you want to hear another one?"

### Simple RAG system


In [4]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("protocol.pdf")
pages = loader.load_and_split()

pages

[Document(page_content='CLINICAL STUDY PROTOCOL \n \n \nProtocol Title:  A Phase 1/2 Multicenter Study Evaluating the Safety and Efficacy of \nKTE -C19 in Subjects with Refractory Aggressiv e Non -Hodgkin \nLymphoma (ZUMA -1) \nProtocol Number:  KTE -C19-101 \nIND Number:  016278  \nEudraCT Number:  2015 -005007 -86  \nClinical Study Sponsor:  Kite Pharma, Inc.  \n2400 Broadway  \nSanta Monica, CA 90404  \nUnited States of America  \nKey Sponsor Contacts:  , Clinical Development  \nKite Pharma, Inc.  \n2 Roundwood Avenue  \nStockley Park  \nUxbridge, Middlesex  \nPhone:  \nEmail:  \n  Clinical Development  \nKite Pharma Inc.  \n2400 Broadway  \nSanta Monica, CA  90404  \nPhone:  \nEmail:   \n  Clinical Operations  \nKite Pharma, Inc.  \n2400 Broadway  \nSanta Monica, CA 90404  \nPhone:  \nEmail:  \nVersion:  1.0 (Amendment #  8) \nDate:  11 February  2019 \n \nConfidentiality Notice \nThis document contains proprietary and confidential information of Kite Pharma Inc., a wholly \nowned 

In [5]:
from langchain.prompts import PromptTemplate

template = """
Answer the question based on the given context.

Context: {context}

Question: {question}

"""

prompt = PromptTemplate.from_template(template)
print (prompt.format(context="here is the context", question="here is the question"))



Answer the question based on the given context.

Context: here is the context

Question: here is the question




In [6]:
from langchain.vectorstores import DocArrayInMemorySearch
from langchain_community.embeddings import OllamaEmbeddings

embeddings = OllamaEmbeddings()
vectorstore = DocArrayInMemorySearch.from_documents(pages, embedding=embeddings)

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
retriever = vectorstore.as_retriever()

retriever.invoke("Zuma 1")

[Document(page_content='Axicabtagene ciloleucel; KTE -C19 \nClinical Protocol: KTE-C l9-10 1 \nFigure 3. Phase 1 Dosing Cohort s and Regimen s \nYes* \n*May be explored : see Section 9.6 \nCONFIDENTIAL Page 27 of 118 Kite Phanna , Inc. \nFinal \nNo \n11 Februaiy 2019', metadata={'source': 'protocol.pdf', 'page': 26}),
 Document(page_content='Axicabtagene ciloleucel; KTE-C19 Kite Pharma, Inc. \nClinical Protocol: KTE-C19- 101 Final  \nCONFIDENTIAL Page 115 of 118  11 February 2019 \n 18. A\nPPENDICES  \nAppendix A  Revised IWG Response Criteria for Malignant Lymphoma  (Cheson et al, \n2007)  \nAppendix B  Monitoring of subjects after IP administration per country regulatory agencies:', metadata={'source': 'protocol.pdf', 'page': 114}),
 Document(page_content='Axicabtag ene ciloleuc el; KTE-C19 \nClinica l Protocol: KTE -Cl 9- 101 Kite Phanna , Inc. \nFinal \nhas been studied and tolerated in subjects with B cell malignancies (O\'Brien et al, 2001). Similar \ntotal doses of cyclophospham

In [8]:
from operator import itemgetter

chain = (
    {
        "context": itemgetter("question") | retriever, 
        "question": itemgetter("question")
    }
    | prompt
    | model
    | parser
)

chain.invoke({"question": "How many phases is the zuma 1 study separated into?"})

'Based on the given context, the zuma 1 study is separated into 3 phases:\n\nPhase 1: This phase involves screening and enrollment of subjects into the study.\nPhase 2: This phase consists of the pivotal study, which evaluates the efficacy and safety of axicabtagene ciloleucel in subjects with diffuse large B-cell lymphoma, primary mediastinal B-cell lymphoma, or transformed follicular lymphoma.\nPhase 2 Safety Management Study: This phase assesses the impact of a prophylactic regimen, earlier interventions, debilitating disease, or death on the safety of axicabtagene ciloleucel.\n\nTherefore, the zuma 1 study is separated into 3 phases.'

In [9]:
Questions = [
    "How many phases is the zuma 1 study separated into ?",
    "How many cohorts are there in the zuma 1 study ?",
    "What periods in the zuma 1 study does each subject proceed through ?",
    "What is the primary objective of the Phase 1 zuma 1 study ?",
    "What are the secondary objectives of the Phase 1 study ?",
    "What is the primary objective of the Phase 2 pivotal study ?",
    "What are the secondary objectives of the Phase 2 pivotal study",
    "What is the primary objective of the phase 2 safety management study?",
    "What are the secondary objectives of the Phase 2 safety management study",
    "What is the primary endpoint of the phase 1 study",
    "What are the secondary endpoints of the phase 1 study",
    "What is the primary endpoint of the phase 2 pivotal study?",
    "What are the secondary endpoints of the phase 2 pivotal study",
    "What is the primary endpoint of the phase 2 safety management study",
    "What are the secondary endpoints of the phase 2 safety management studies",
    "How many subjects will be enrolled in the zuma 1study?",
    "How many subjects will be enrolled in the phase 1 zuma 1study",
    "How many subjects will be enrolled in the phase 2 pivotal study",
    "How many subjects will be enrolled in the cohort 1 of the phase 2 pivotal study",
    "How many subjects will be enrolled in the cohort 2 of the phase 2 pivotal study",
    "How many subjects will be enrolled in the phase 2 safety management study",
    "How many subjects will be enrolled in cohort 3 phase 2 safety management study",
    "How many subjects will be enrolled in cohort 4 phase 2 safety management study",
    "How many subjects will be enrolled in cohort 5 phase 2 safety management study",
    "How many subjects will be enrolled in cohort 6 phase 2 safety management study",
    "What is the investigational product in this study",
    "What does the investigational product treatment consist of",
    "What does axicabtagene ciloleucel treatment consist of",
    "When may bridging therapy be considered?",
    "When may debulking therapy be considered?",
    "Which subjects may be considered for debulking therapy?",
    "What conditioning chemotherapy is used before axicabtagene ciloleucel is administered?",
    "What is the dose of the conditioning chemotherapy?",
    "What is the conditioning chemotherapy regimen?",
    "What is the fludarabine dose used in conditioning chemotherapy?",
    "What is the cyclophosphamide dose used in conditioning chemotherapy?",
    "For how many days is the conditioning chemotherapy given?",
    "What treatment is given to subjects in cohort 3 of the phase 2 safety management study?",
    "What treatment is given to subjects in cohort 4 of the phase 2 safety management study?",
    "What conditioning chemotherapy is given to subjects in cohort 5 of the phase 2 safety management study?",
    "What treatment is given to subjects in cohort 5 of phase 2 safety management study?",
    "What conditioning chemotherapy is given to subjects in cohort 6 of the phase 2 safety management study?",
    "According to the schedule of assessments, what assessments or procedures will the subjects undergo at regular time periods?",
    "What procedures will the subjects undergo at baseline",
    "Which subjects will take the EQ-5D questionnaire",
    "When will the subjects in cohort 3, cohort 4, cohort 5 and cohort 6 will take the EQ-5D questionnaire?",
    "What is the power of the study for the test of efficacy",
    "When will the subjects in cohort 3, cohort 4, cohort 5 and cohort 6 have lumbar punctures for collection of cerebro spinal fluid?"
]

for question in Questions:
    print (f"Question: {question}")
    print (f"Answer: {chain.invoke({'question': question})}")
    print ()

Question: How many phases is the zuma 1 study separated into ?
Answer: Based on the document provided, the Phase 1 Study of axicabtagene ciloleucel is separated into three phases:

1. Screening Phase
2. Dose-limiting Toxicity (DLT) Evaluable Phase
3. Maintenance Phase

So, there are 3 phases in total for the Phase 1 Study of axicabtagene ciloleucel.

Question: How many cohorts are there in the zuma 1 study ?
Answer: Based on the document provided, there are 6 cohorts in the Phase 1 Study of axicabtagene ciloleucel. These cohorts are identified as Cohort 1 through Cohort 6.

Question: What periods in the zuma 1 study does each subject proceed through ?
Answer: According to the protocol document provided, each subject in the ZUMA 1 study will proceed through the following periods:

1. Screening period: This starts when the subject signs the informed consent form (ICF) and lasts until the subject is determined to be eligible or ineligible for the study.
2. Enrollment period: This starts w