## Requirements

In [None]:
!pip install langchain==0.2.5 langchain-openai==0.1.9 langchain-community==0.2.5 chromadb==0.5.3

# RAG using data.txt

## import modules

In [17]:
import os
#import sys
import getpass

from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.document_loaders import DirectoryLoader, TextLoader, PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

import chromadb

## OpenAI API

In [None]:
api_key = getpass.getpass("Enter your OPENAI_API_KEY:\n\n")
os.environ["OPENAI_API_KEY"] = api_key

## Prompt and Template

In [13]:
template1=  """Use the following pieces of context to answer the question at the end.
      If the context is not relevant, please answer the question by using your own knowledge about the topic. just before answering write "BAASED ON MY OWN KNOWLEDGE: "
      you can also give a combined answer but remember to tell me which is which.

      {context}

      Question: {question}
"""
template2=  """Use the following pieces of context to answer the question at the end.
      If the context is not relevant, dont try to use your own knowledge and simply say i don't know.

      {context}

      Question: {question}
"""

prompt_template = template1

PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
    )

chain_type_kwargs = {"prompt": PROMPT}

## Show Document

In [10]:
with open('data.txt', 'r') as file:
    file_content = file.read()
print(file_content)

im going to doctor on October 15 2023.
im going to specialist doctor on the next week.
today is 10/4/2023.



## Generate Retrieval Augment

In [18]:
# Load and split document
loader = TextLoader("data.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

# Embedding
embeddings = OpenAIEmbeddings()

# Create a vecorstore using chroma
db = Chroma.from_documents(texts, embeddings)

# Define a retriever
retriever = db.as_retriever(search_type="similarity")   # search_type = similarity / mmr

## Run OpenAI Model

In [16]:
# Define OpenAI model with proper arguments
llm_openai =  ChatOpenAI(
    model="gpt-3.5-turbo-0125",
    temperature=0
    )

# Define Retriveal Q/A
qa_openai = RetrievalQA.from_chain_type(
    llm=llm_openai,
    chain_type="stuff",                     # chain_type = ['stuff', 'map_reduce', 'refine', 'map_rerank']
    retriever=retriever,
    chain_type_kwargs=chain_type_kwargs,
    verbose=True,
    return_source_documents=True
    )

## enter and run query

In [25]:
question = "when is my doctor appointment?"
result = qa_openai.invoke(question)



[1m> Entering new RetrievalQA chain...[0m





[1m> Finished chain.[0m


In [None]:
print("Question:")
print(result['query'])

print("Answer:")
print(result['result'])

sources = result['source_documents']
print("\nSources:\n")
for source in sources:
  print(source.page_content)
  print(source.metadata)
  print("\n\n\n")