In [110]:
import os
from dotenv import load_dotenv
# api_key = os.environ.get("OPENAI_API_KEY")
# os.environ['OPENAI_API_KEY'] = api_key
load_dotenv()

True

In [111]:
from langchain import OpenAI
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import ChatPromptTemplate

In [112]:
import os
import json
import sys


def file_reader(path: str, ) -> str:
    fname = os.path.join(path)
    with open(fname, 'r') as f:
        system_message = f.read()
    return system_message
            
def get_prompt():
    prompt_message = file_reader("../prompts/prompt_generation_template.txt")
    prompt = str(prompt_message)
    return prompt

## Prompt Generator Template

In [113]:
RAG_PROMPT_TEMPLATE = get_prompt()

rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
rag_prompt

ChatPromptTemplate(input_variables=['context', 'output'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'output'], template='Your task is to formulate exactly 5 questions from given context and provide the answer to each one.\n\nEnd each question with a \'?\' character and then in a newline write the answer to that question using only \nthe context provided.\nThe output MUST BE in a json format. \n\nexample:\n[\n{{\n    "user": "What is the name of the company?",\n    "assistant": "Google"\n}},\n{{\n    "user": "What is the name of the CEO?",\n    "assistant": "Sundar Pichai"\n}}\n]\n\nEach question must start with "user:".\nEach answer must start with "assistant:".\n\n\nThe question must satisfy the rules given below:\n1.The question should make sense to humans even when read without the given context.\n2.The question should be fully answered from the given context.\n3.The question should be framed from a part of context that contains important

## Embeddings and Vector Store

In [114]:
loader = TextLoader('./week_6_challenge_doc.txt')
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 1000, chunk_overlap=200, model_name = "gpt-4-1106-preview")
texts  = text_splitter.split_documents(documents)

llm = OpenAI(temperature=0)

embeddings = OpenAIEmbeddings()
store = Chroma.from_documents(texts,embeddings, collection_name="challenge_document")

## Stringoutput Parser

In [115]:
from langchain.schema import StrOutputParser

str_output_parser = StrOutputParser()

## Setting up RAG chain

In [118]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

retriever = store.as_retriever()

entry_point_and_retriever = RunnableParallel(
    {
        "context" : retriever,
        "output" : RunnablePassthrough()
    }
)

rag_chain = entry_point_and_retriever | rag_prompt | llm | str_output_parser

In [121]:
rag_chain.invoke('i want to know the goals of the challenge')

'?\n\n[\n{\n    "user": "What are the goals of the challenge?",\n    "assistant": "The goal of this approach is to support and reward expertise in different parts of the Machine learning engineering toolbox."\n},\n{\n    "user": "What are the fundamental tasks for this week\'s challenge?",\n    "assistant": "The core tasks for this week’s challenge in Automatic Prompt Engineering are outlined below."\n},\n{\n    "user": "What is the task 1 about?",\n    "assistant": "Task 1: Review the Evolution of Automatic Prompt Engineering."\n},\n{\n    "user": "What are the key developments in the field of automatic prompt engineering?",\n    "assistant": "Focus on understanding the key developments in the field of automatic prompt engineering for Language Models (LLMs)."\n},\n{\n    "user": "What is the purpose of the badges?",\n    "assistant": "In addition to being the badge holder for that badge, each badge winner will get +20 points to the overall score."\n}\n]'