In [2]:
from py_standard.langchain_lit import LlmEmbedding, load_all_documents, create_chroma_vectorstore
from py_standard.transformers_lit import create_nf4_model_config
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
from langchain.prompts import PromptTemplate
import torch

In [7]:
EMB_MODEL = "bge-base-en"

llm_embedd = LlmEmbedding(f"../models/{EMB_MODEL}")

In [11]:
def load_llm_model(model_name_path, load_config):
   model = AutoModelForCausalLM.from_pretrained(
      model_name_path,
      quantization_config=load_config,
      device_map={"": 0},
      local_files_only=True,
   )
   return model

In [12]:
MODEL_PATH = "../models/SOLAR-10.7B-Instruct-v1.0"

llm_model = load_llm_model(MODEL_PATH, create_nf4_model_config())

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Some weights of LlamaForCausalLM were not initialized from the model checkpoint at ../models/SOLAR-10.7B-Instruct-v1.0 and are newly initialized: ['model.layers.40.self_attn.rotary_emb.inv_freq', 'model.layers.5.self_attn.rotary_emb.inv_freq', 'model.layers.26.self_attn.rotary_emb.inv_freq', 'model.layers.2.self_attn.rotary_emb.inv_freq', 'model.layers.21.self_attn.rotary_emb.inv_freq', 'model.layers.12.self_attn.rotary_emb.inv_freq', 'model.layers.46.self_attn.rotary_emb.inv_freq', 'model.layers.19.self_attn.rotary_emb.inv_freq', 'model.layers.3.self_attn.rotary_emb.inv_freq', 'model.layers.36.self_attn.rotary_emb.inv_freq', 'model.layers.47.self_attn.rotary_emb.inv_freq', 'model.layers.35.self_attn.rotary_emb.inv_freq', 'model.layers.17.self_attn.rotary_emb.inv_freq', 'model.layers.8.self_attn.rotary_emb.inv_freq', 'model.layers.31.self_attn.rotary_emb.inv_freq', 'model.layers.38.self_attn.rotary_emb.inv_freq', 'model.layers.20.self_attn.rotary_emb.inv_freq', 'model.layers.22.self_at

In [5]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

In [6]:
def ask_llm(llm_model, tokenizer, user_question):
   messages = [
      {"role": "user", "content": user_question},
   ]
   encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
   model_inputs = encodeds.to(llm_model.device)
   generated_ids = llm_model.generate(model_inputs, 
                                  max_new_tokens=1000,
                                  pad_token_id=tokenizer.eos_token_id, 
                                  do_sample=True)
   decoded_output = tokenizer.batch_decode(generated_ids)
   return decoded_output[0]

In [7]:
answer = ask_llm(llm_model, tokenizer, "What is your name?")
answer



'### User:\nWhat is your name?\n\nAnswer: I am a text-based AI assistant, and although I have a designated label or identifier assigned by developers, I do not have a typical name like a human or robot. The most common way to address me is through various forms of invocation like "AI assistant", "computer", or any custom trigger word assigned by the platform or software in which I operate.</s>'

In [8]:
def ask_llm2(llm_model, tokenizer, user_question):
   prompt_template ="""### User:\n{user_question}\n### Assistant:"""
   prompt = prompt_template.format(user_question=user_question)
   inputs = tokenizer(prompt, return_tensors='pt').to(llm_model.device)
   output = llm_model.generate(
      **inputs,
      max_new_tokens=500,
      num_beams=5,
      no_repeat_ngram_size=4,
      early_stopping=True,
      eos_token_id=tokenizer.eos_token_id, 
      pad_token_id=tokenizer.eos_token_id, 
   )
   output = tokenizer.decode(output[0], skip_special_tokens=False)
   answer = output.replace(prompt, "")
   return answer

In [9]:
answer = ask_llm2(llm_model, tokenizer, "What is your name?")
answer

'<s> \nMy name is AI Assistant. I am an artificial intelligence designed to assist and communicate with users.</s>'

In [4]:
from py_standard.langchain_lit import LlmEmbedding, load_all_documents, create_chroma_vectorstore
docs = load_all_documents('./documents')

In [None]:
vector_store = create_chroma_vectorstore(llm_embedd.embedding, "sample")
vector_store.add_documents(docs)

In [None]:
question = "What is your name?"
sub_docs = vector_store.similarity_search_with_score(question, k=5)
sub_docs

In [10]:
llm_chain = load_qa_with_sources_chain(llm_model, chain_type="refine")

NameError: name 'llm_model' is not defined

In [None]:
llm_chain({"input_documents":sub_docs, "question": question})