In [14]:
# pip install langchain

In [15]:
# !pip3 install unstructured libmagic python-magic python-magic-bin 

In [16]:
# pip install -U langchain-community

In [17]:
# pip install langchain langchain-ollama

In [18]:
# pip install streamlit

In [19]:
# pip install sentence-transformers

In [20]:
# pip install faiss-cpu

In [21]:
# pip install faiss-gpu

In [22]:
# pip install python-dotenv

In [23]:
import os
from dotenv import load_dotenv
import pickle
import langchain
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

In [24]:
load_dotenv()
# Access the token
api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")

In [25]:
# Set your Hugging Face Hub API token if needed
os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_token

In [26]:
# Initialize the Hugging Face LLM using a hosted model (e.g., Falcon 7B)
llm = HuggingFaceHub(
    repo_id="HuggingFaceH4/zephyr-7b-beta", #change the model accordingly
    model_kwargs={"temperature": 0.9, "max_new_tokens": 500}
)

  llm = HuggingFaceHub(
  from .autonotebook import tqdm as notebook_tqdm


In [27]:
# (1) Load data
loaders = UnstructuredURLLoader(urls=[
    "https://www.datacamp.com/blog/manus-ai",
    "https://medium.com/@vsinha027/chinas-manus-ai-d4ccafc2affd"
])
data = loaders.load()
print(f"Number of documents loaded: {len(data)}")

Number of documents loaded: 2


In [28]:
# (2) Split data into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
docs = text_splitter.split_documents(data)
print(f"Number of document chunks: {len(docs)}")

Number of document chunks: 6


In [29]:
# (3) Create embeddings using HuggingFaceEmbeddings and build a FAISS index
# Here we use the free, open-source "all-MiniLM-L6-v2" model for embeddings.
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorindex = FAISS.from_documents(docs, embeddings)

# Save the FAISS vector index locally for reuse
file_path = "vector_index_new.pkl"
with open(file_path, "wb") as f:
    pickle.dump(vectorindex, f)

# Optionally, load the vector index if it already exists
if os.path.exists(file_path):
    with open(file_path, "rb") as f:
        vectorindex = pickle.load(f)


  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [None]:
# (4) Retrieve similar documents for a given query and generate an answer using Llama 3
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorindex.as_retriever())

query = "What is manus AI?"

langchain.debug = True

chain({"question": query}, return_only_outputs=True)


  chain({"question": query}, return_only_outputs=True)


[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "What is manus AI?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "Introduction to Manus.AI\n\nManus, derived from the Latin word for “hand”, is a general AI agent that turns your thoughts into actions\n\nManus AI is China’s latest large language model (LLM), and it’s already shaking up the AI landscape. Developed by Monica, a rising AI company, Manus AI is being positioned as a serious competitor to OpenAI’s ChatGPT-4.5 and Google’s Gemini models. But what makes it special?",
      "question": "What is manus AI?"
    },
    {
      "context": "Vivek Sinha, PhD\n\nFollow