In [110]:
# import os
# import streamlit as st
# import pickle
# import dill
# import time
# import langchain
# from langchain import OpenAI
# from langchain.chains import RetrievalQAWithSourcesChain
# from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain.document_loaders import UnstructuredURLLoader
# from langchain.embeddings import OpenAIEmbeddings
# from langchain.vectorstores import FAISS

In [177]:
import os
import pickle
import faiss
import langchain
from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore import InMemoryDocstore
from langchain.schema import Document

In [1]:
#load api key
# os.environ[''] = ''

In [179]:
# Initialise LLM with required params
llm = OpenAI(temperature=0.9, max_tokens=500) 

### (1) Load data

In [176]:
# loaders = UnstructuredURLLoader(urls=[
#     "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
#     "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
# ])
# data = loaders.load() 
# len(data)

In [180]:
loaders = UnstructuredURLLoader(urls=[
    "https://www.theguardian.com/technology/2024/apr/21/tesla-sales-price-cuts-elon-musk",
    "https://www.bbc.co.uk/news/articles/c1d4g8jz57yo"
])
data = loaders.load() 
len(data)

2

### (2) Split data to create chunks

In [181]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)

In [182]:
len(docs)

10

In [183]:
docs[0]

Document(page_content='The starting price for the Model X was reduced to $77,990 in the US. Photograph: David Zalubowski/AP\n\nView image in fullscreen\n\nThe starting price for the Model X was reduced to $77,990 in the US. Photograph: David Zalubowski/AP\n\nTesla\n\nTesla cuts prices around the world as sales decline in a chaotic week\n\nThis article is more than 2 months old\n\nGlobal vehicle deliveries fall for first time in four years amid growing competition, while Cybertruck faces recall\n\nGuardian staff and agencies\n\nSun 21 Apr 2024 18.46 BST\n\nShare\n\nTesla slashed prices of three of its five models in the US late on Friday, then went on to cut prices around the globe – including in China and Germany – as the company faces falling sales, a Cybertruck recall and an intensifying war for electric vehicles (EVs).\n\nWhat the Cybertruck’s many failures mean for TeslaRead more', metadata={'source': 'https://www.theguardian.com/technology/2024/apr/21/tesla-sales-price-cuts-elon-m

In [184]:
docs[1]

Document(page_content='What the Cybertruck’s many failures mean for TeslaRead more\n\nOn Friday, the company, led by the billionaire Elon Musk, cut the prices of the Model Y, a small SUV that is Tesla’s most popular model and the top-selling electric vehicle in the US, and also of the Models X and S, its older and more expensive models. Prices for the Model 3 sedan and the Cybertruck stayed the same.\n\nThe cuts reduced the starting price for a Model Y to $42,990, and to $72,990 for a Model S and $77,990 for a Model X.\n\nThen on Saturday, Tesla slashed the US price of its “Full Self-Driving” driver assistant software from $12,000 to $8,000.\n\nThe cuts continued on Sunday, when Tesla cut the starting price of the revamped Model 3 in China by 14,000 yuan ($1,930) to 231,900 yuan ($32,000), its official website showed.\n\nIn Germany, the carmaker trimmed the price of its Model 3 rear-wheel drive to €40,990 ($43,670.75) from €42,990, where the price has been since February.', metadata={'

### (3) Create embeddings for these chunks and save them to FAISS index

In [185]:
# Create the embeddings of the chunks using openAIEmbeddings
embeddings = OpenAIEmbeddings()

In [186]:
# Determine the embedding dimension (assuming 1536 for OpenAI embeddings like text-embedding-ada-002)
embedding_dimension = 1536

In [187]:
# Create a document store
docstore_dict = {str(i): doc for i, doc in enumerate(docs)}
docstore = InMemoryDocstore(docstore_dict)
print(f"Document store keys: {list(docstore_dict.keys())[:10]}")  # Debug output

Document store keys: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']


In [188]:
# Create FAISS vector index
index = faiss.IndexFlatL2(embedding_dimension)

# Initialize the FAISS vector store with a correct mapping
index_to_docstore_id = {i: str(i) for i in range(len(docs))}
vector_store = FAISS(embedding_function=embeddings, index=index, docstore=docstore, index_to_docstore_id=index_to_docstore_id)

In [189]:
# Add documents to the FAISS index
vector_store.add_documents(docs)
print("Documents added to FAISS index.")

Documents added to FAISS index.


In [129]:
# # Pass the documents and embeddings inorder to create FAISS vector index
# vectorindex_openai = FAISS.from_documents(docs, embeddings)

In [190]:
# Save the FAISS index and documents separately
index_path = "faiss_index.bin"
faiss.write_index(vector_store.index, index_path)

In [191]:
docs_path = "docs.pkl"
with open(docs_path, "wb") as f:
    pickle.dump(docs, f)

In [192]:
# Save the index_to_docstore_id mapping
index_to_docstore_id_path = "index_to_docstore_id.pkl"
with open(index_to_docstore_id_path, "wb") as f:
    pickle.dump(vector_store.index_to_docstore_id, f)

In [193]:
# Load the FAISS index and documents
if os.path.exists(index_path) and os.path.exists(docs_path) and os.path.exists(index_to_docstore_id_path):
    index = faiss.read_index(index_path)
    with open(docs_path, "rb") as f:
        docs = pickle.load(f)
    with open(index_to_docstore_id_path, "rb") as f:
        index_to_docstore_id = pickle.load(f)
    docstore = InMemoryDocstore({str(i): doc for i, doc in enumerate(docs)})
    print(f"Loaded document store keys: {list(docstore._dict.keys())[:10]}")  # Debug output
    embeddings = OpenAIEmbeddings()  # Recreate embeddings object
    vector_store = FAISS(embedding_function=embeddings, index=index, docstore=docstore, index_to_docstore_id=index_to_docstore_id)

Loaded document store keys: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']


In [101]:
# # import dill

# # Storing vector index create in local
# file_path="vector_index2.pkl"
# with open(file_path, "wb") as f:
#     pickle.dump(vectorindex_openai, f)

In [102]:
# if os.path.exists(file_path):
#     with open(file_path, "rb") as f:
#         vectorIndex = pickle.load(f)

### (4) Retrieve similar embeddings for a given question and call LLM to retrieve final answer

In [194]:
# chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorIndex.as_retriever())
# chain

chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vector_store.as_retriever())
chain



In [199]:
# Define the query
query = "what is the price of Tesla in 2024?"
# query = "what are the main features of punch iCNG?"

# Enable debugging
langchain.debug = True

# Invoke the chain
try:
    result = chain.invoke({"question": query}, return_only_outputs=True)
    print(result)
except Exception as e:
    print(f"Error: {e}")
    
# # Invoke the chain
# chain.invoke({"question": query}, return_only_outputs=True)

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "what is the price of Tesla in 2024?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "In Germany, the carmaker trimmed the price of its Model 3 rear-wheel drive to €40,990 ($43,670.75) from €42,990, where the price has been since February.\n\nThere were also price cuts in many other countries in Europe, the Middle East and Africa, a Tesla spokesperson said.\n\nThe swathe of price cuts comes after the company reported this month that its global vehicle deliveries in the first quarter had fallen for the first time in nearly four years.\n\nTesla shares fell below $150 this w

In [198]:
# Define the query
query = "what are the main reasons for the price cut?"

langchain.debug=True

# Invoke the chain
try:
    result = chain.invoke({"question": query}, return_only_outputs=True)
    print(result)
except Exception as e:
    print(f"Error: {e}")
    
# chain.invoke({"question": query}, return_only_outputs=True)

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "what are the main reasons for the price cut?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "Tesla cuts prices in major markets as sales fall\n\nImage source, Getty Images\n\nImage caption,\n\nA price war has been intensifying between electric vehicle makers\n\nMariko Oi\n\nBusiness reporter\n\nPublished22 April 2024\n\nTesla has cut its prices again in a number of major markets - including the US, China and Germany - as the electric car giant run by multi-billionaire Elon Musk faces falling sales.\n\nThe move comes after it reported a sharp fall in its global vehicle d