In [16]:
import os
import streamlit as st
import time
import pickle
import langchain
from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain_community.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from dotenv import load_dotenv
load_dotenv()

True

In [17]:
openai_api_key = os.environ["OPENAI_API_KEY"]

In [18]:
llm = OpenAI(temperature=0.9, max_tokens=500)

In [19]:
loaders = UnstructuredURLLoader(
    urls=[
        "https://techcrunch.com/2024/08/08/soundhound-acquires-amelia-ai-for-80m-after-it-raised-189m/",
        "https://techcrunch.com/2024/08/07/youtube-is-testing-a-feature-that-lets-creators-use-google-gemini-to-brainstorm-video-ideas/"
    ]
) 

data = loaders.load()
len(data)

2

In [20]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 200,
    chunk_overlap = 200
)

docs = text_splitter.split_documents(data)

In [21]:
len(docs)

536

In [22]:
docs[0]

Document(metadata={'source': 'https://techcrunch.com/2024/08/08/soundhound-acquires-amelia-ai-for-80m-after-it-raised-189m/'}, page_content='AI\n\nSoundHound acquires Amelia AI for $80M after it raised $189M+\n\nIngrid Lunden\n\n5:59 AM PDT • August 8, 2024\n\nComment')

In [23]:
embdeddings = OpenAIEmbeddings()

vector_index = FAISS.from_documents(docs, embdeddings)

In [42]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm,retriever = vector_index.as_retriever())

In [43]:
query = "Which feature youtube is testing?"

In [44]:
langchain.debug=True
chain({'question': query}, return_only_outputs=True)

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "Which feature youtube is testing?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "that the feature is available to select creators as a part of a small, limited experiment. YouTube will consider feedback from creators before deciding whether to roll out the feature more broadly.",
      "question": "Which feature youtube is testing?"
    },
    {
      "context": "told TechCrunch that the feature is available to select creators as a part of a small, limited experiment. YouTube will consider feedback from creators before deciding whether to roll out the feature",
      "

{'answer': ' The feature that YouTube is testing is a brainstorming tool for video ideas called Google Gemini.\n',
 'sources': 'https://techcrunch.com/2024/08/07/youtube-is-testing-a-feature-that-lets-creators-use-google-gemini-to-brainstorm-video-ideas/'}