# List of Packages to Download

```python
!pip install langchain-chroma
!pip install langchain
!pip install langchain_community
!pip install langchainhub
!pip install --upgrade --quiet  gpt4all > /dev/null

In [31]:
!pip install langchain-chroma
!pip install langchain
!pip install langchain_community
!pip install langchainhub
!pip install gpt4all



In [32]:
!pip install gpt4all



# 1. Get a Data Loader


In [33]:
from langchain_community.document_loaders import WebBaseLoader


In [34]:
loader = WebBaseLoader("https://www.foxsports.com/nba/lebron-james-player-stats")
data = loader.load()
data

[Document(page_content="\n\n\nLeBron James Stats - NBA | FOX Sports\n\n\n    my favs \n          Access and manage your favorites here\n          \n            DISMISS\n                  Scores Watch UEFA Euro 2024 COPA AMÉRICA Odds Super 6 Stories  \n                  Search\n                     \n                  Sign In\n                    \n                  Account\n                    \n              SPORTS & TEAMS\n             \n              PLAYERS\n             \n              SHOWS\n             \n              PERSONALITIES\n                    SPORTS   \n              SPORTS & TEAMS\n            \n              PLAYERS\n            \n              SHOWS\n            \n              PERSONALITIES\n             \n          SPORTS\n          \n                NFL\n                \n                NCAA FB\n                \n                MLB\n                \n                NBA\n                \n                NCAA BK\n                \n                NASCAR\n     

# 2. Convert data to Vector Database


In [35]:
from langchain.vectorstores import Chroma
from langchain.embeddings import GPT4AllEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [36]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(data)

In [43]:
embedding = GPT4AllEmbeddings(model_name="Phi-3-mini-4k-instruct-q4.gguf")
Chroma.from_documents(documents=splits, embedding=embedding, persist_directory="./chroma_db")

ValidationError: 1 validation error for GPT4AllEmbeddings
__root__
  Request failed: HTTP 404 Not Found (type=value_error)

# 3. Make a RAG pipeline


In [38]:

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQA
from langchain import hub

In [39]:
from langchain_community.llms import Ollama


In [40]:
model_name = "phi3"
llm = Ollama(model=model_name)
prompt = hub.pull("rlm/rag-prompt")
vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embedding)

In [41]:
qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=vectorstore.as_retriever(),
        chain_type_kwargs={"prompt": prompt}
    )

In [42]:
question = "Tell me what the stats are saying"
result = qa_chain({"query": question })

ConnectionError: HTTPConnectionPool(host='localhost', port=11434): Max retries exceeded with url: /api/generate (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000002C8377E26A0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))

In [None]:
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(result["result"])

('The LeBron James statistics show that he averages 26.3 points per game (13th '
 'in the league), 6.7 rebounds (32nd in the league), and 9.7 assists per game '
 '(5th in the league). His shooting efficiency is at 62.7%, with a field goal '
 'percentage of 1.7 steals per game (17th in the league) and 0 blocks on '
 'record according to the provided context.')
