# Meta Llama 3.1 8B Instruct Experiments 

We need to start by importing all necessary modules.

In [17]:
from langchain.prompts import PromptTemplate
from langchain.chains.retrieval_qa.base import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
from langchain_core.documents.base import Document
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_ollama import OllamaLLM
import pinecone

from typing import List
import os

## Simple Langchain Experiment

First, we need to initialize a simple test prompt.

In [18]:
template = '''Please give me a trivia fact about the {model_name} deep learning model.'''
prompt = PromptTemplate.from_template(template=template)

Next, we should initialize an instance of Meta's Llama 3.1 model.

In [19]:
model = OllamaLLM(model='llama3.1:8b')

Now, we should create a chain.

In [20]:
chain = prompt | model

Lastly, we should invoke the chain using some input argument.

In [21]:
model_name = 'Multilayer Perceptron' # change to whatever your preferred DL model is
# --------------------------------------------------------------------------------------

answer = chain.invoke({'model_name': model_name})
print(answer)

Here's a trivia fact:

**Did you know that the Multilayer Perceptron (MLP) was actually invented by Frank Rosenblatt in 1958, decades before the modern Deep Learning era?**

Rosenblatt, an American computer scientist and engineer at Cornell Aeronautical Laboratory, published a paper titled "The Perceptron: A Perceiving Machine" in which he described the concept of a feedforward neural network with multiple layers (the MLP). This was a groundbreaking work that laid the foundation for many modern deep learning techniques.

It's interesting to note that while Rosenblatt's work predated the resurgence of interest in neural networks, his own perceiver was not as effective or scalable as modern MLPs. Nevertheless, his contribution to the field is undeniable!


## RAG Experiment

Next, we define a function that extracts data from a PDF file and then use it on the Medical Encyclopedia to be used as the knowledge base.

In [6]:
def extract_data(path: str) -> List[Document]:
    '''
    Extracts data from the PDF at the passed path.

    Inputs:
        path: a filepath to the PDF to be extracted from.

    Returns:
        docs: a list of Document objects containing the extracted data.
    '''
    
    loader = PyPDFDirectoryLoader(path=path)
    docs = loader.load()

    return docs

In [7]:
data = extract_data('../data/')

Now, we define a function to split the extracted data into text chunks and use it.

In [8]:
def split_text(data: List[Document]) -> List[Document]:
    '''
    Splits the input data into text chunks.

    Inputs:
        data: a list of Document objects containing data extracted from a PDF.

    Returns:
        chunks: a list of Document objects containing text chunks.
    '''

    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    chunks = splitter.split_documents(data)

    return chunks

In [9]:
chunks = split_text(data=data)

At this point, we need to download an embedding model from Hugging Face.

In [10]:
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')



In [11]:
embedding_model

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

Now, we need to populate our Pinecone Index with embedded text chunks.

In [14]:
index_name = 'medical-chatbot'
pc_vector_store = PineconeVectorStore.from_documents(chunks, index_name=index_name, embedding=embedding_model)

Next, we should create our prompt template.

In [15]:
template = '''
Please use the following information to answer the user's question.
If you don't know the answer, do NOT try to make one up; just say you don't know.

Context: {context}
Question: {question}

Only return a helpful answer and nothing else below:
'''


In [16]:
prompt = PromptTemplate(template=template, input_variables=['context', 'question'])
chain_kwargs = {'prompt': prompt}

At this point, we will set up our LLM and our chain.

In [27]:
llm = OllamaLLM(model='llama3.1:8b', temperature=0.8, num_gpu=1)

In [29]:
qa = RetrievalQA.from_chain_type(llm=llm, retriever=pc_vector_store.as_retriever(), return_source_documents=True, chain_type_kwargs=chain_kwargs)

Finally, it's time to test!

In [37]:
question = ''
while question.lower() != 'thank you':
    question = input('Enter your medical question or "Thank you" to exit: ')
    print(f'Input: {question}')

    result = qa({'query': question})
    print(f'Response: {result["result"]}\n')

Input: What are the symptoms of the flu?
Response: Fever, headache, body ache, sore throat, chills, weakness, and low-grade fever followed by watery diarrhea, nausea, loss of appetite, and muscle aches.

Input: What are the symptoms of the bubonic plague?
Response: I don't see any information on the bubonic plague in the provided text.

Input: What about strep throat? What are the symptoms for that?
Response: A sore throat, chills, fever, and swollen lymph nodes in the neck.

Input: thank you
Response: You're welcome.

