In [1]:
from transformers import AutoTokenizer

from langchain.text_splitter import SentenceTransformersTokenTextSplitter
import os
import psycopg2 as pg


In [2]:
# connect to postgresql database

connection = pg.connect(
    user=os.environ.get('DB_USER'),
    password=os.environ.get('DB_PASSWORD'),
    host="localhost",
    port=os.environ.get('DB_PORT'),
    database="vector_test"
)

In [3]:
from langchain.document_loaders import PyPDFLoader

In [4]:
resume_pdf = PyPDFLoader("documents/Perigo_James_Courtney_resume_2023_nocontactinfo.pdf")

In [5]:
resume = resume_pdf.load()

In [6]:
text_splitter = SentenceTransformersTokenTextSplitter(
    # Set a really small chunk size, just to show.
    tokens_per_chunk=20,
    chunk_overlap=10,
)

  return self.fget.__get__(instance, owner)()


In [7]:
docs = text_splitter.transform_documents(resume)

In [8]:
len(docs)

85

In [9]:
docs[0]

Document(page_content='james “ courtney ” perigo expert data science leader driving innovation for business success contact courtneyperigo.', metadata={'source': 'documents/Perigo_James_Courtney_resume_2023_nocontactinfo.pdf', 'page': 0})

In [10]:
len(docs)

85

In [11]:
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
)

In [12]:
# for doc in docs:
#     source = doc.metadata['source']
#     page = doc.metadata['page']
#     page_content = doc.page_content
#     vector = embeddings.embed_query(page_content)
#     # write to database
#     cursor = connection.cursor()
#     cursor.execute("INSERT INTO t_docs (doc_source, doc_page, embedding, doc_content) VALUES (%s, %s, %s, %s)", (source, page, vector, page_content))
#     connection.commit()
#     cursor.close()
    

In [17]:
import torch
import numpy as np
import pandas as pd
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

In [18]:
# check if cuda is available
print(torch.cuda.is_available())
print(torch.version.cuda)

True
11.8


In [19]:
torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)

In [20]:
MODEL_NAME = "IlyaGusev/saiga_mistral_7b_lora"
DEFAULT_MESSAGE_TEMPLATE = "<s>{role}\n{content}</s>"
DEFAULT_RESPONSE_TEMPLATE = "<s>bot\n"
DEFAULT_SYSTEM_PROMPT = "Imagine you are a helpful assistant answering questions about Courtney Perigo. You are to answer truthfully and accurately."

In [21]:
class Conversation:
    def __init__(
        self,
        message_template=DEFAULT_MESSAGE_TEMPLATE,
        system_prompt=DEFAULT_SYSTEM_PROMPT,
        response_template=DEFAULT_RESPONSE_TEMPLATE
    ):
        self.message_template = message_template
        self.response_template = response_template
        self.messages = [{
            "role": "system",
            "content": system_prompt
        }]

    def add_user_message(self, message):
        self.messages.append({
            "role": "user",
            "content": message
        })

    def add_bot_message(self, message):
        self.messages.append({
            "role": "bot",
            "content": message
        })

    def get_prompt(self, tokenizer):
        final_text = ""
        for message in self.messages:
            message_text = self.message_template.format(**message)
            final_text += message_text
        final_text += DEFAULT_RESPONSE_TEMPLATE
        return final_text.strip()


def generate(model, tokenizer, prompt, generation_config):
    data = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
    data = {k: v.to(model.device) for k, v in data.items()}
    output_ids = model.generate(
        **data,
        generation_config=generation_config
    )[0]
    output_ids = output_ids[len(data["input_ids"][0]):]
    output = tokenizer.decode(output_ids, skip_special_tokens=True)
    return output.strip()

In [22]:
config = PeftConfig.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    load_in_8bit=True,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)
model = PeftModel.from_pretrained(
    model,
    MODEL_NAME,
    torch_dtype=torch.bfloat16
)
model.eval()

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
print(generation_config)

bin d:\Documents\GitHub\jarvis-ai-demo\.venv\lib\site-packages\bitsandbytes\libbitsandbytes_cuda118_nocublaslt.dll


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


GenerationConfig {
  "bos_token_id": 1,
  "do_sample": true,
  "eos_token_id": 2,
  "max_new_tokens": 1536,
  "no_repeat_ngram_size": 15,
  "pad_token_id": 0,
  "repetition_penalty": 1.1,
  "temperature": 0.2,
  "top_k": 40,
  "top_p": 0.9
}



In [58]:
user_query = "Does Courtney have a dog?"
query_vector = embeddings.embed_query(user_query)

query_vector_db = f"""SELECT * from t_docs ORDER BY embedding <-> '{query_vector}' LIMIT 5"""

# get closest documents that match a query

cursor = connection.cursor()
cursor.execute(query_vector_db)

rows = cursor.fetchall()

cursor.close()

text = f"""
    Imagine you are a helpful assistant answering questions about a man named Courtney Perigo. 
    You are to answer truthfully and accurately.  
    Your response will be professional and not contain profanity or any other inappropriate content. 
    Use the following 5 pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. 
    Keep the answer as concise as possible. 
    1. {rows[0][3]} 
    2. {rows[1][3]} 
    3. {rows[2][3]} 
    4. {rows[3][3]} 
    5. {rows[4][3]} 
    Question: {user_query}
    Helpful Answer:
"""

In [59]:
conversation = Conversation()
conversation.add_user_message(text)
prompt = conversation.get_prompt(tokenizer)

output = generate(model, tokenizer, prompt, generation_config)
print('---Prompt---')
print(text)
print('---Response---')
print(output)
print()
print("==============================")
print()

---Prompt---

    Imagine you are a helpful assistant answering questions about a man named Courtney Perigo. 
    You are to answer truthfully and accurately.  
    Your response will be professional and not contain profanity or any other inappropriate content. 
    Use the following 5 pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. 
    Keep the answer as concise as possible. 
    1. courtney - perigo. medium. com profile data s cience l eader and p ract 
    2. james “ courtney ” perigo expert data science leader driving innovation for business success contact courtneyperigo. 
    3. james “ courtney ” perigo expert data science leader driving innovation for business success experience, continued director, 
    4. in annual recurring revenue for the agency. won the drum digital advertising award for our novel c urve 
    5. in paid marketing performance. vp, director of decision sciences 