In [None]:
%pip install pgvector
%pip install -U langchain-core langchain-mistralai
%pip install llama-cpp-python

!python -m pip install -e ./llama-cpp-python --force-reinstall --no-cache-dir --user

In [1]:

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [2]:
loader = PyPDFLoader("data/Retire-Rich-PDF.pdf")
pages = loader.load()

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=100)

In [4]:
docs = text_splitter.split_documents(pages)

In [5]:
from langchain.embeddings import HuggingFaceEmbeddings

In [6]:
#embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
embeddings = HuggingFaceEmbeddings(model_name="intfloat/e5-base-v2")
#from sentence_transformers import SentenceTransformer

#model = SentenceTransformer("intfloat/e5-base-v2")

In [7]:
from langchain_community.vectorstores.pgvector import PGVector

In [15]:
CONNECTION_STRING = "postgresql+psycopg2://postgres:hello@localhost:5433/vector_db"

COLLECTION_NAME = "state_of_the_union_test"

db = PGVector.from_documents(
    embedding=embeddings,
    documents=docs,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
)

In [16]:
import os
#os.environ["PGVECTOR_VECTOR_SIZE"] = str(768)
query = "How to retire successfully"
docs_with_score = db.similarity_search_with_score(query)
for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    print(doc.page_content)
    print("-" * 80)

--------------------------------------------------------------------------------
Score:  0.34892473664655466
The advantage of slowing down is mutual — the organization gets to keep
the employee for a longer period, the positions are non-competitive, the
employee is not looking for a new job every day, and all the experience is
effectively used to everyone’ s advantage.
When can we retire?
Many of us fancy retirement — and it is in our hands to make it happen
exactly the way we want it to happen. When we can retire is a function of
how much money we need for retirement — and how much of this is
already availabl e. Retirement is voluntary for businessmen, and even for
people in service, a post-retirem ent job is a possibility . Howeve r, for many
people retirement is a luxury . For many of us, longevity will surely be an
issue–the longes t portion of our life may be spent in retirement. I know of
employees who retired at 55 and then lived until the age of 95! Assuming
that you started wo

In [None]:
from langchain.llms import LlamaCpp

In [None]:
#Import Model
#llm = LlamaCpp(
   # streaming = True,
   # model_path="./models/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
   # n_ctx= 2048,
   # verbose=True,
   # use_mlock=True,
#    n_gpu_layers=12,
  #  n_threads=4,
 #   n_batch=1000
#)

from langchain_community.llms import Ollama

llm = Ollama(model="mistral")

In [None]:
from langchain.chains import RetrievalQA
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever(search_kwargs={"k": 2}), verbose=True)

In [None]:
qa.invoke(query)



In [None]:
import torch

if not torch.cuda.is_available():
    raise SystemError("CUDA is not available. Please check your NVIDIA GPU and driver installation.")
print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")

# Step 2: Verify PyTorch and CUDA compatibility
print(f"Using PyTorch version: {torch.__version__}")
print(f"Using CUDA version: {torch.version.cuda}")


In [None]:
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# see: https://huggingface.co/mistralai/Mistral-7B-v0.1#troubleshooting
#assert transformers.__version__ >= 4.34.0

#MODEL_NAME = "mistralai/Mistral-7B-v0.1"
MODEL_NAME="TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
DEVICE = "cuda" if torch.cuda.is_available() else "CPU"
from transformers import AutoModel
model = AutoModel.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GGUF")


# [1.] Load model and move to GPU
#model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
#tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
#model = model.to(DEVICE)

# [2.] Do inference
#input_data = "..."
#encoded_input = tokenizer(input_data, 
#                          padding=True, 
#                          truncation=True, 
#                          return_tensors="pt").to(DEVICE)
#resp = model(**encoded_input)

In [None]:
# Load model directly
from transformers import AutoModel
import os
os.getcwd()
model = AutoModel.from_pretrained("./models/mistral-7b-instruct-v0.1.Q4_K_M.gguf")