In [4]:
# pip install --upgrade langchain openai qdrant-client pandas nltk tomotopy pyvis
# pip install langchain-community
# pip install langchain-qdrant
# pip install tiktoken
# pip install langchain-openai


## Step 1: Scrape book data

In [5]:
from langchain_community.document_loaders import WebBaseLoader


USER_AGENT environment variable not set, consider setting it to identify your requests.


In [42]:
loader = WebBaseLoader("https://www.espn.com/mlb/insider/story/_/id/40734383/stock-watch-post-trade-deadline-guide-all-30-mlb-teams")
documents = loader.load()

In [43]:
documents

[Document(metadata={'source': 'https://www.espn.com/mlb/insider/story/_/id/40734383/stock-watch-post-trade-deadline-guide-all-30-mlb-teams', 'title': 'Stock Watch: Post-trade deadline guide for all 30 MLB teams - ESPN', 'description': "How each club's playoff hopes have risen -- or fallen -- and what comes next.", 'language': 'en'}, page_content="\n\n\n\n\n\n\n\n\nStock Watch: Post-trade deadline guide for all 30 MLB teams - ESPN\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n        Skip to main content\n    \n\n        Skip to navigation\n    \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n<\n\n>\n\n\n\n\n\n\n\n\n\nMenuESPN\n\n\n\n\n\nscores\n\n\n\nNFLNBAMLBOlympicsSoccerWNBA…BoxingCFLNCAACricketF1GolfHorseLLWSMMANASCARNBA G LeagueNBA Summer LeagueNCAAFNCAAMNCAAWNHLNWSLPLLProfessional WrestlingRacingRN BBRN FBRugbySports BettingTennisX GamesU

In [44]:
?? documents

[0;31mType:[0m        list
[0;31mString form:[0m [Document(metadata={'source': 'https://www.espn.com/mlb/insider/story/_/id/40734383/stock-watch-post-trade-deadline-guide-all-30-mlb-teams', 'title': 'Stock Watch: Post-trade deadline guide for all 30 MLB teams - ESPN', 'description': "How each club's playoff hopes have risen -- or fallen -- and what comes next.", 'language': 'en'}, page_content="\n\n\n\n\n\n\n\n\nStock Watch: Post-trade deadline guide for all 30 MLB teams - ESPN\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n        Skip to main content\n    \n\n        Skip to navigation\n    \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n<\n\n>\n\n\n\n\n\n\n\n\n\nMenuESPN\n\n\n\n\n\nscores\n\n\n\nNFLNBAMLBOlympicsSoccerWNBA…BoxingCFLNCAACricketF1GolfHorseLLWSMMANASCARNBA G LeagueNBA Summer LeagueNCAAFNCAAMNCAAWNHLNWSLPLLProfessional Wrest

## Step 2: Setting up the vector database

In [45]:
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_qdrant import Qdrant
from dotenv import load_dotenv
import tiktoken
import os


In [46]:
import  nest_asyncio
nest_asyncio.apply()

### Setup OpenAI key

In [47]:
env_path = '/Users/andishehtavakoli/Documents/github-project/llm-apps/RAG-search-books/.env'


# Load environment variables from the specified .env file
load_dotenv(dotenv_path=env_path)
# Get the API key from the environment variable
api_key = os.getenv('OPENAI_API_KEY')

In [48]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
def create_db(documents):
    return Qdrant.from_documents(
        documents=documents,
        embedding=embeddings,
        collection_name="my_documents",
        location=":memory:",
        force_recreate=False,
    )
db = create_db(documents)

## Step 3: Information retrieval using relevant context

In [54]:
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from pprint import pprint

In [50]:
llm = ChatOpenAI()

In [52]:
retriever = db.as_retriever(
        search_type="mmr", search_kwargs={"k": 2, "lambda_mult": 0.25}
    )
# Create a chain to answer questions
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)
query = "what happens in baseball in July?"
result = qa({"query": query})

In [55]:
pprint(result)

{'query': 'what happens in baseball in July?',
 'result': 'In July, significant events in baseball include the All-Star '
           'Weekend, the Hall of Fame induction ceremony, and the trade '
           'deadline with teams making moves leading up to it. Passing the '
           'midpoint of the schedule is also significant as it clarifies the '
           'season view based on what teams have shown so far rather than '
           'preseason forecasts.',
 'source_documents': [Document(metadata={'source': 'https://www.espn.com/mlb/insider/story/_/id/40734383/stock-watch-post-trade-deadline-guide-all-30-mlb-teams', 'title': 'Stock Watch: Post-trade deadline guide for all 30 MLB teams - ESPN', 'description': "How each club's playoff hopes have risen -- or fallen -- and what comes next.", 'language': 'en', '_id': 'e129deef810d4995a4b32dad6e5a99c1', '_collection_name': 'my_documents'}, page_content="\n\n\n\n\n\n\n\n\nStock Watch: Post-trade deadline guide for all 30 MLB teams - ESPN\n\n