### Example

In [11]:
from langchain import PromptTemplate
from langchain import OpenAI
template = """Question: {question}

Answer: """
prompt = PromptTemplate(
        template=template,
    input_variables=['question']
)

# user question
question = "who is Asmaa Tbaeen?"


import os
OPENAI_API_KEY = "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"
davinci = OpenAI(model_name='text-davinci-003')
from langchain.llms import OpenAI
from langchain import HuggingFaceHub, LLMChain
llm_chain = LLMChain(
    prompt=prompt,
    llm=davinci
)

print(llm_chain.run(question))

 Asmaa Tbaeen is a Palestinian journalist and human rights activist. She is a prominent figure in the Palestinian Authority and is the founder and director of the Palestinian Freedom Center, which is dedicated to advocating for the rights of Palestinians and promoting peace and dialogue.


In [2]:
# Asking Multiple Questions
qs = [
    {'question': "Which NFL team won the Super Bowl in the 2010 season?"},
    {'question': "If I am 6 ft 4 inches, how tall am I in centimeters?"},
    {'question': "Who was the 12th person on the moon?"},
    {'question': "How many eyes does a blade of grass have?"}
]
res = llm_chain.generate(qs)
res

LLMResult(generations=[[Generation(text=' The Green Bay Packers won the Super Bowl in the 2010 season.', generation_info={'finish_reason': 'stop', 'logprobs': None})], [Generation(text=' 193.04 centimeters', generation_info={'finish_reason': 'stop', 'logprobs': None})], [Generation(text=' Eugene Cernan was the 12th person on the moon. He was the last person to have ever walked on the moon as part of the Apollo 17 mission in 1972.', generation_info={'finish_reason': 'stop', 'logprobs': None})], [Generation(text=' A blade of grass does not have any eyes.', generation_info={'finish_reason': 'stop', 'logprobs': None})]], llm_output={'token_usage': {'prompt_tokens': 75, 'completion_tokens': 62, 'total_tokens': 137}, 'model_name': 'text-davinci-003'})

### After adding our own source

In [14]:
import os
 
from langchain import OpenAI

from llama_index import (GPTVectorStoreIndex,ResponseSynthesizer,LLMPredictor,PromptHelper,SimpleDirectoryReader)
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.indices.postprocessor import SimilarityPostprocessor
 
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-003"))

# Configure prompt parameters and initialise helper
max_input_size = 4096
num_output = 256
max_chunk_overlap = 20

prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)

# Load documents from the 'data' directory
documents = SimpleDirectoryReader('data_doc').load_data()
index = GPTVectorStoreIndex.from_documents(documents)

# configure retriever
retriever = VectorIndexRetriever(
    index=index, 
    similarity_top_k=2,
)

# configure response synthesizer
response_synthesizer = ResponseSynthesizer.from_args(
    node_postprocessors=[
        SimilarityPostprocessor(similarity_cutoff=0.7)
    ]
)

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

 
query = ["How many eyes does a blade of grass have?","in short: what is the conclusion of CDMX document?","what is the best review ?", "summarize all reviews"]
# query
for qu in query:
    response = query_engine.query(qu)
    print(response)



A blade of grass does not have any eyes.

The conclusion of the CDMX document is that the main target for Rumbo should be level D, as it is the predominant socioeconomic status in the Metropolitan Area and in which people have a high probability of using public transport on a daily basis. The majority of the population in the Metropolitan Area earns less than MXN $15,000 monthly (which means less than USD $25 a day), and the predominant income bracket is MXN $5,000 monthly  to $10,000 monthly (which means USD $8 to USD $16 a day). The areas where fewer people live in poverty are located in the centre and the west of CDMX, while the areas where between 20% to 40% of the population live in poverty are in the south of the city and in some neighbourhoods of the outskirts. About 51% of the population in the Metropolitan Area uses public transport (essentially the informal at 76%, followed by the Metro with 30%), although a high percentage of users use two forms of transport to reach their 

In [12]:
import os
import openai
import asyncio
from datetime import datetime

import uvicorn
from fastapi import FastAPI, Body
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import RedirectResponse
from fastapi import Request

from threading import Thread
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from pydantic import BaseModel

from slack_bolt.adapter.socket_mode import SocketModeHandler
from slack_sdk import WebClient
from slack_bolt import App

from langchain.llms import OpenAI
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from llama_index import SimpleDirectoryReader,GPTVectorStoreIndex
from langchain.memory import ConversationBufferMemory
  

# Set the OPENAI_API_KEY environment variable, for future calls
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

# Set up FastAPI
fast_api_app = FastAPI(
    title="ChatGPTWimt",
    description="Application for querying information about our documents, or redirecting to ChatGPT",
)
fast_api_app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

@fast_api_app.get("/", include_in_schema=False)
async def docs_redirect():
    return RedirectResponse(url="/docs")


@fast_api_app.get("/health")
async def root():
    return {"status": "healthy"}

# Event API & Web API
app = App(token=SLACK_BOT_TOKEN) 
client = WebClient(SLACK_BOT_TOKEN)

# load documents from materials directory
loader = PyPDFDirectoryLoader("materials")
documents = loader.load()

# split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

# # select which embeddings we want to use
embeddings = OpenAIEmbeddings()

# # create the vectorestore to use as the index
db = Chroma.from_documents(texts, embeddings, persist_directory=".")
db.persist()
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":2})
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# # create a chain to answer questions 
qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0.9, model_name="text-davinci-003"), retriever, memory=memory, chain_type='map_reduce')



Using embedded DuckDB with persistence: data will be stored in: .


In [13]:

question = "in short: what is the conclusion of CDMX document?"
response = qa({"question": question})
 

In [14]:
response

{'question': 'in short: what is the conclusion of CDMX document?',
 'chat_history': [HumanMessage(content='in short: what is the conclusion of CDMX document?', additional_kwargs={}, example=False),
  AIMessage(content=' The majority of public transport users in the Metropolitan Area of CDMX belong to the socioeconomic levels D or D+, with the most vulnerable groups being those with the least mobility and most reliance on public transport, as transportation costs take up a higher percentage of their income.', additional_kwargs={}, example=False)],
 'answer': ' The majority of public transport users in the Metropolitan Area of CDMX belong to the socioeconomic levels D or D+, with the most vulnerable groups being those with the least mobility and most reliance on public transport, as transportation costs take up a higher percentage of their income.'}