## 1. Set your API key

In [1]:
apikey = "sk-..............................."
organization = "org-........................"

## 2. load text files

In [2]:
from langchain.document_loaders import DirectoryLoader

In [3]:
document_directory = "text_files"
loader = DirectoryLoader(document_directory)
documents = loader.load()

In [4]:
documents

[Document(page_content="The history of computers dates back to the early 19th century when Charles Babbage conceived the idea of a mechanical computer. However, it wasn't until the mid-20th century that electronic computers became a reality. The ENIAC, built in 1945, is considered one of the earliest general-purpose electronic computers. Over the years, computers have evolved significantly in terms of size, power, and capabilities. The invention of the microprocessor in the 1970s revolutionized the industry and paved the way for personal computers, leading to the computer revolution in the 1980s.", metadata={'source': 'text_files\\History of Computers.txt'}),
 Document(page_content='The solar system comprises the Sun and all the celestial objects that orbit it, including planets, moons, asteroids, and comets. The Sun, a massive ball of hot plasma, accounts for over 99% of the total mass of the solar system. The four inner planets, Mercury, Venus, Earth, and Mars, are rocky and relative

## 3. Load OpenAI Embeddings

In [5]:
from langchain.embeddings import OpenAIEmbeddings

In [6]:
embeddings = OpenAIEmbeddings(openai_api_key=apikey)

## 4. Load OpenAI LLM

In [7]:
from langchain.llms import OpenAI

In [8]:
llm = OpenAI(openai_api_key=apikey, openai_organization=organization)

## 5. Create index

In [9]:
from langchain.vectorstores import Chroma

In [10]:
db = Chroma.from_documents(documents, embeddings)

## 6. Create retriever from index and chain it with LLM

In [11]:
retriever = db.as_retriever()

In [12]:
from langchain.chains import RetrievalQAWithSourcesChain

In [13]:
qa = RetrievalQAWithSourcesChain.from_chain_type(llm=llm, retriever=retriever)

## 7. Query the chain

In [14]:
question = "What is Jupiter and Saturn ?"
generated_text = qa(question)
generated_text

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


{'question': 'What is Jupiter and Saturn ?',
 'answer': ' Jupiter and Saturn are gas giants with thick atmospheres, located in the outer solar system. \n',
 'sources': 'text_files\\Solar System.txt'}

In [16]:
question = "What happened in 2006 ?"
generated_text = qa(question)
generated_text

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


{'question': 'What happened in 2006 ?',
 'answer': ' In 2006, Pluto was reclassified as a dwarf planet.\n',
 'sources': 'text_files\\Solar System.txt'}