In [1]:
!pip install langchain langchain-openai

import os
os.environ["OPENAI_API_KEY"] = "YOUR API KEY"

Collecting langchain
  Downloading langchain-0.1.9-py3-none-any.whl (816 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m817.0/817.0 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-openai
  Downloading langchain_openai-0.0.7-py3-none-any.whl (33 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.4-py3-none-any.whl (28 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langchain-community<0.1,>=0.0.21 (from langchain)
  Downloading langchain_community-0.0.24-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-core<0.2,>=0.1.26 (from langchain)
  Downloading langchain_core-0.1.26-py3-none-any.whl (246 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m246.4/246.4 kB[0m [31m19.1 MB/s[0m eta [

In [2]:
!pip install wikipedia
!pip install chromadb
!pip install tiktoken

Collecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wikipedia
  Building wheel for wikipedia (setup.py) ... [?25l[?25hdone
  Created wheel for wikipedia: filename=wikipedia-1.4.0-py3-none-any.whl size=11678 sha256=2ac91d8e87625556ee025dfe1de3e5a3929e14dc7a548622575f1323b1bf1df5
  Stored in directory: /root/.cache/pip/wheels/5e/b6/c5/93f3dec388ae76edc830cb42901bb0232504dfc0df02fc50de
Successfully built wikipedia
Installing collected packages: wikipedia
Successfully installed wikipedia-1.4.0
Collecting chromadb
  Downloading chromadb-0.4.23-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.7/521.7 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Collecting chroma-hnswlib==0.7.3 (from chromadb)
  Downloading chroma_hnswlib-0.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
from langchain.document_loaders import WikipediaLoader

docs = WikipediaLoader(query="Langchain", load_max_docs=1).load()
len(docs)

1

In [4]:
docs[0]

Document(page_content='LangChain is a framework designed to simplify the creation of applications using large language models (LLMs). As a language model integration framework, LangChain\'s use-cases largely overlap with those of language models in general, including document analysis and summarization, chatbots, and code analysis.\n\n\n== History ==\nLangChain was launched in October 2022 as an open source project by Harrison Chase, while working at machine learning startup Robust Intelligence. The project quickly garnered popularity, with improvements from hundreds of contributors on GitHub, trending discussions on Twitter, lively activity on the project\'s Discord server, many YouTube tutorials, and meetups in San Francisco and London. In April 2023, LangChain had incorporated and the new startup raised over $20 million in funding at a valuation of at least $200 million from venture firm Sequoia Capital, a week after announcing a $10 million seed investment from Benchmark.In October

# Load In Docs For A Topic

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma


# Build Splitter
chunk_size = 2000
chunk_overlap = 200

recursive_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap,
    separators=["\n\n", "\n", " ", ""]
)

topics = ['Langchain', 'OpenAI', 'Generative AI', 'Large Language Models', 'Natural Language Processing']


all_docs = []
for topic in topics:
  loader = WikipediaLoader(query=topic, load_max_docs=1)
  docs = loader.load_and_split(recursive_splitter)
  all_docs.extend(docs)

len(all_docs)

16

In [6]:
all_docs[0]

Document(page_content="LangChain is a framework designed to simplify the creation of applications using large language models (LLMs). As a language model integration framework, LangChain's use-cases largely overlap with those of language models in general, including document analysis and summarization, chatbots, and code analysis.\n\n\n== History ==\nLangChain was launched in October 2022 as an open source project by Harrison Chase, while working at machine learning startup Robust Intelligence. The project quickly garnered popularity, with improvements from hundreds of contributors on GitHub, trending discussions on Twitter, lively activity on the project's Discord server, many YouTube tutorials, and meetups in San Francisco and London. In April 2023, LangChain had incorporated and the new startup raised over $20 million in funding at a valuation of at least $200 million from venture firm Sequoia Capital, a week after announcing a $10 million seed investment from Benchmark.In October 2

In [None]:
# db.delete()

In [7]:
# Store data in db
db = Chroma.from_documents(all_docs, OpenAIEmbeddings())
print(db._collection.count())

16


In [8]:
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)


memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

retriever=db.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [None]:
while True:
  question = input("User:")

  if question.lower() == 'quit':
    break

  result = qa({"question": question})

  print(f"Assistant: {result['answer']}")

User:What is Langchain?
Assistant: The purpose of LangChain is to simplify the creation of applications using large language models (LLMs). It serves as a language model integration framework and can be used for various tasks such as document analysis and summarization, chatbots, and code analysis.
User:When was it founded?
Assistant: LangChain was founded in October 2022.
User:Who is the founder?
Assistant: LangChain was founded by Harrison Chase.
User:How does Langchain benefit Natural Language Processing?
Assistant: LangChain offers several benefits for Natural Language Processing (NLP):

1. Simplified Application Creation: LangChain is designed to simplify the creation of applications using large language models (LLMs). It provides a framework that streamlines the integration of language models into NLP applications, making it easier for developers to build and deploy NLP solutions.

2. Wide Range of Use-Cases: LangChain's use-cases overlap with those of language models in general,