In [None]:
!pip install -q pypdf
!pip install -q python-dotenv
!pip install -q transformers

In [None]:
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install  llama-cpp-python --no-cache-dir

In [None]:
!pip install -q llama-index
!pip install llama-index-embeddings-huggingface
!pip install llama-index-llms-llama-cpp
!pip install torch transformers python-pptx Pillow
!pip -q install sentence-transformers
!pip install llama-index-embeddings-langchain
!pip install --upgrade langchain


## Setting up logger and mount to colab

In [None]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Collect personal collection of papers

In [None]:
documents = SimpleDirectoryReader("/content/drive/MyDrive/paper/papers",recursive=True).load_data() # recursive=True FOR SUB DIRECTORIES

## Call the LLM

In [None]:
import torch

from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.llms.llama_cpp.llama_utils import (
    messages_to_prompt,
    completion_to_prompt,
)
llm = LlamaCPP(
    model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf',
    model_path=None,
    temperature=0.1,
    max_new_tokens=256,
    context_window=3900,
    generate_kwargs={},
    model_kwargs={"n_gpu_layers": -1},
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)

### Creat embeddings and store them if you want

In [None]:
from llama_index.embeddings.langchain import LangchainEmbedding
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

from llama_index.core.settings import Settings

embed_model = LangchainEmbedding(
  HuggingFaceEmbeddings(model_name="thenlper/gte-large")
)

In [None]:
from llama_index.core.settings import Settings
from llama_index.core.callbacks import CallbackManager
callback_manager = CallbackManager()
Settings.llm = llm
Settings.embed_model = embed_model
Settings.callback_manager = callback_manager
index = VectorStoreIndex.from_documents(documents)
#store the index for later use
index.storage_context.persist(persist_dir="/content/drive/MyDrive/paper/vindex2")


## Initialize the query engine and print the responses

In [None]:
query_engine = index.as_query_engine()
response = query_engine.query("How to calculate open circuit voltage?")
print(response)