
# List of Packages to Download

```python
!pip install langchain-chroma
!pip install langchain
!pip install langchain_community
!pip install langchainhub
!pip install -qU langchain-openai

```
## Also fix api key


```python

import os

os.environ["OPENAI_API_KEY"] = "your-openai-key"
```


______________________________________________________________________________
 

# 1. Get a Data Loader


In [3]:
from langchain_community.document_loaders import WebBaseLoader


In [4]:
loader = WebBaseLoader("https://www.foxsports.com/nba/lebron-james-player-game-log?season=2023&seasonType=reg")
data = loader.load()
data

[Document(page_content="\n\n\nLeBron James Game Log - NBA  | FOX Sports\n\n\n    my favs \n          Access and manage your favorites here\n          \n            DISMISS\n                  Home Scores Watch Podcasts Odds Super 6 Stories  \n                  Search\n                     \n                  Sign In\n                    \n                  Account\n                    \n              SPORTS & TEAMS\n             \n              PLAYERS\n             \n              SHOWS\n             \n              PERSONALITIES\n                    SPORTS   SPORTS & TEAMSPLAYERSSHOWSPERSONALITIES \n          SPORTS\n          \n                NFL\n                \n                NCAA FB\n                \n                MLB\n                \n                NBA\n                \n                NCAA BK\n                \n                NASCAR\n                \n                Soccer\n                \n                UFL\n                \n                NCAAW BK\n          

# 2. Convert data to Vector Database


In [6]:
from langchain.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings


In [7]:
Chroma.from_documents(documents=data, embedding=OpenAIEmbeddings(model="text-embedding-3-large"), persist_directory="./chroma_db")

<langchain_community.vectorstores.chroma.Chroma at 0x1193cea90>

# 3. Make a RAG pipeline


In [8]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQA
from langchain import hub

In [9]:
llm = ChatOpenAI(model="gpt-4o")
prompt = hub.pull("rlm/rag-prompt")
vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=OpenAIEmbeddings(model="text-embedding-3-large"))


In [10]:
qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=vectorstore.as_retriever(),
        chain_type_kwargs={"prompt": prompt}
    )

In [11]:
question = "Explain what the table is Showing"
result = qa_chain({"query": question })

  warn_deprecated(
Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


In [12]:
result["result"]

"The table shows LeBron James' game log for the 2023-24 NBA season, detailing his performance statistics for each game. It includes metrics such as minutes played, points scored, field goals made, three-point field goals made, free throws made, rebounds (offensive and defensive), assists, steals, blocks, personal fouls, turnovers, and plus-minus ratings."

In [13]:
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(result["result"])

("The table shows LeBron James' game log for the 2023-24 NBA season, detailing "
 'his performance statistics for each game. It includes metrics such as '
 'minutes played, points scored, field goals made, three-point field goals '
 'made, free throws made, rebounds (offensive and defensive), assists, steals, '
 'blocks, personal fouls, turnovers, and plus-minus ratings.')
