In [None]:
# %pip install llama-index-vector-stores-mongodb

In [None]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

import os 
from dotenv import load_dotenv
load_dotenv()

from huggingface_hub import login
print(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
login(
    token=os.getenv("HUGGINGFACEHUB_API_TOKEN"), 
    write_permission=True,
    add_to_git_credential=True,
)
MONGO_DB_URI=os.getenv("MONGO_DB_URI")

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

Settings.embed_model = HuggingFaceEmbedding()#cache_folder="./tmp/")
Settings

In [None]:
# Provide URI to constructor, or use environment variable
import pymongo
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from llama_index.core import VectorStoreIndex
from llama_index.core import StorageContext
from llama_index.core import SimpleDirectoryReader

In [None]:
mongodb_client = pymongo.MongoClient(MONGO_DB_URI)
store = MongoDBAtlasVectorSearch(mongodb_client)
storage_context = StorageContext.from_defaults(vector_store=store)

docs = SimpleDirectoryReader(
    input_files=["./data/nimrethanda.pdf"]
).load_data()
index = VectorStoreIndex.from_documents(
    docs, storage_context=storage_context
)

In [None]:
from llama_index.core import PromptTemplate

query_wrapper_prompt = PromptTemplate(
    "Below is an instruction that describes a task. "
    "Write a response that appropriately completes the request.\n\n"
    "### Instruction:\n{query_str}\n\n### Response:"
)

In [None]:
from llama_index.llms.huggingface import HuggingFaceLLM,HuggingFaceInferenceAPI
import torch

# MODEL = "thenlper/gte-small"
MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
# MODEL = "HuggingFaceH4/zephyr-7b-alpha"
# MODEL = "Writer/camel-5b-hf"

llm = HuggingFaceInferenceAPI(
    context_window=2048,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.25, "do_sample": False},
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=MODEL, #"Writer/camel-5b-hf",
    model_name=MODEL, #"Writer/camel-5b-hf",
    device_map="auto",
    tokenizer_kwargs={"max_length": 2048},
    # uncomment this if using CUDA to reduce memory usage
    model_kwargs={"torch_dtype": torch.float16}
)

Settings.chunk_size = 512
Settings.llm = llm

In [None]:
response = index.as_query_engine().query("What is flower ?")
response