In [3]:
from langchain.llms import LlamaCpp

In [None]:
# pip install sentence_transformers
# pip install langchain
# pip install pypdf
# pip install typing-extensions==4.8.0
# pip install chromadb

In [None]:
MODEL_PATH = "../models/dolphin-2.5-mixtral-8x7b.Q4_K_M.gguf"
MODEL_PATH = "/home/flash/models/dolphin-2.5-mixtral-8x7b.Q4_K_M.gguf"
# 38min
# 40s
llm = LlamaCpp(
    model_path=MODEL_PATH,
    n_ctx=1024 * 32,
    n_gpu_layers=40,
    n_threads=15,
    n_batch=512,
    f16_kv=True,
    #callback_manager=callback_manager,
    verbose=True,
)

In [6]:
system_prompt = """I'm a QA expert, I'll respond only to answers I'm confident in, and for ones I'm not sure about, I'll simply say 'I don't know'."""

In [7]:
prompt_template = """<|im_start|>system
{system_prompt}<|im_end|>
<|im_start|>user
{user_prompt}<|im_end|>
<|im_start|>assistant
"""

In [8]:
def ask(user_input):
   prompt = prompt_template.format(system_prompt=system_prompt, user_prompt=user_input)
   resp = llm(prompt,  
      max_tokens=2048,
      temperature=0.7,
      top_p=0.1,
      repeat_penalty=1.1)
   return resp

In [9]:
answer = ask("What is your name?")
answer


llama_print_timings:        load time =    1002.43 ms
llama_print_timings:      sample time =       2.47 ms /     7 runs   (    0.35 ms per token,  2836.30 tokens per second)
llama_print_timings: prompt eval time =    1000.54 ms /    63 tokens (   15.88 ms per token,    62.97 tokens per second)
llama_print_timings:        eval time =     275.20 ms /     6 runs   (   45.87 ms per token,    21.80 tokens per second)
llama_print_timings:       total time =    1313.20 ms


" I don't know."

In [13]:
def read_file(file):
   with open(file, 'r', encoding='utf-8') as f:
      return f.read()

In [1]:
from py_standard.langchain_lit import split_documents, convert_docs_to_splits, LlmEmbedding
from py_standard.pdf_utils import load_pdf_documents_from_directory
from langchain.vectorstores import Chroma

EMB_MODEL = "bge-base-en"
def load_all_pdfs(path):
   pdfs = load_pdf_documents_from_directory(path)
   docs = split_documents(pdfs, 1000 * 10)
   return docs
   
def load_vectorstore(docs):
   llm_embedding = LlmEmbedding(f"../models/{EMB_MODEL}")
   vectorstore = Chroma.from_documents(documents=docs,
                                        embedding=llm_embedding.embedding)
   return vectorstore
   # retriever = vectorstore.as_retriever(
   #   search_kwargs={'k': 3, 'fetch_k': 50}
   # )
   # return retriever


In [2]:
docs = load_all_pdfs('./pdfs')
vectorstore = load_vectorstore(docs)

In [13]:
question = "What is Five-Count Baccarat?"
docs = vectorstore.similarity_search(question)
splits = convert_docs_to_splits(docs)
for split in splits:
   print(f"{split['source']=}")

split['source']='pdfs/Silverthorne Publications, Inc..pdf'
split['source']='pdfs/Five-CountBaccarat-Book.pdf'
split['source']='pdfs/Five-CountBaccarat-Book.pdf'
split['source']='pdfs/Silverthorne Publications, Inc..pdf'


In [17]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain

system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
Read the given context before answering questions and think step by step. If you can not answer a user question based on
the provided context, inform the user. Do not use any other information for answering user"""

prompt_template = """
Context: {context}
User: {question}
"""

QA_CHAIN_PROMPT = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template
)

# Chain
qa_chain = load_qa_chain(
    llm,
    chain_type="stuff",
    prompt=QA_CHAIN_PROMPT
)

# Run
question = "What is Five-Count Baccarat?"
docs = vectorstore.similarity_search(question)
result = qa_chain({
    "input_documents": docs,
    "question": question
    },
    return_only_outputs=True
)

# Output
result['output_text']

Llama.generate: prefix-match hit

llama_print_timings:        load time =    1002.43 ms
llama_print_timings:      sample time =      96.68 ms /   256 runs   (    0.38 ms per token,  2647.97 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   27949.25 ms /   256 runs   (  109.18 ms per token,     9.16 tokens per second)
llama_print_timings:       total time =   28566.84 ms


'Answer: Five-Count Baccarat is a betting system used in the card game baccarat. This system is based on tracking consecutive wins or losses for either the Player or Banker hand, and adjusting the bet size accordingly. The term "Five-Count" refers to the fact that this system tracks a series of five consecutive wins or losses before adjusting the bet size.\nThe Five-Count Baccarat betting system is designed to optimize a player\'s chances of winning in baccarat by adjusting the bet size based on tracking consecutive wins or losses for either the Player or Banker hand. The system is based on the assumption that after five consecutive wins or losses, the next card dealt is likely to be of a different value than the previous ones, thereby creating a situation where the betting odds are more favorable to the player who adjusts their bet size accordingly.\nThe Five-Count Baccarat betting system is relatively simple to understand and implement in one\'s own baccarat gameplay. The basic steps

In [31]:
context = read_file('./data/baccarat-book.md')

In [26]:
def split_string(text, length):
    return [text[i:i+length] for i in range(0, len(text), length)]

In [32]:
context_splits = split_string(context, 6000)

In [33]:
qa_prompt_template = """Instruction: Answer the question based on following context. 
If the answer cannot be found from the following context, respond with 'I don't know.'.
Here is context to help:

{context}

### USER QUESTION:
{question} 
"""

prompt = qa_prompt_template.format(context=context_splits[0],question="What is baccarat?")
answer = ask(prompt)
answer

Llama.generate: prefix-match hit

llama_print_timings:        load time =    1408.84 ms
llama_print_timings:      sample time =      93.50 ms /   256 runs   (    0.37 ms per token,  2737.97 tokens per second)
llama_print_timings: prompt eval time =   21537.70 ms /  1750 tokens (   12.31 ms per token,    81.25 tokens per second)
llama_print_timings:        eval time =   30391.78 ms /   255 runs   (  119.18 ms per token,     8.39 tokens per second)
llama_print_timings:       total time =   52509.51 ms


" Baccarat is a popular card game played in casinos around the world. The objective of the game is for the player to predict whether the value of their hand will be closer to or equal to the value of the banker's hand. The game is usually played with eight decks of cards, and the value of each card is determined as follows: an ace counts as 1 point, a two through nine count as the face value of the card (for example, a seven counts as 7 points), and a ten or face card (king, queen) counts as 0 points. The total value of each player's hand is then calculated by adding together the values of all of the cards in that player's hand. For example, if a player was holding a two and an ace, the total value of their hand would be calculated as follows: 2 (two) + 1 (ace) = 3 points. Similarly, if a player was holding a seven and an eight, the total value of their hand would be calculated as follows: 7 (seven) + 8 (eight) = 15 points. However, since the game is usually played with eight decks of 

In [34]:
def ask_qa(user_input):
   prompt = qa_prompt_template.format(context=context_splits[0],question=user_input)
   return ask(prompt)

In [35]:
answer = ask_qa("What is Contra-d'Alembert?")
answer

Llama.generate: prefix-match hit

llama_print_timings:        load time =    1408.84 ms
llama_print_timings:      sample time =      44.83 ms /   120 runs   (    0.37 ms per token,  2676.90 tokens per second)
llama_print_timings: prompt eval time =    1164.92 ms /    18 tokens (   64.72 ms per token,    15.45 tokens per second)
llama_print_timings:        eval time =   13307.87 ms /   119 runs   (  111.83 ms per token,     8.94 tokens per second)
llama_print_timings:       total time =   14733.31 ms


" Contra-d'Alembert is a betting system in the game of roulette. The name itself is derived from French mathematician Jean le Rond d'Alembert. \n\nThe concept behind this betting system is contrary to that of the famous martingale betting system. Unlike the martingale system, which involves increasing your bets after losing rounds, Contra-d'Alembert requires you to decrease your bets after losing rounds and increase them after winning ones. This approach aims at minimizing losses while maximizing profits."

In [45]:
answer = ask_qa('What is "Tracker Betting"?')
answer

Llama.generate: prefix-match hit

llama_print_timings:        load time =    1408.84 ms
llama_print_timings:      sample time =      31.19 ms /    84 runs   (    0.37 ms per token,  2693.26 tokens per second)
llama_print_timings: prompt eval time =    1074.32 ms /    15 tokens (   71.62 ms per token,    13.96 tokens per second)
llama_print_timings:        eval time =    8756.51 ms /    83 runs   (  105.50 ms per token,     9.48 tokens per second)
llama_print_timings:       total time =   10017.74 ms


' Tracker Betting is a betting strategy used in baccarat play. It involves tracking the results of each round of play to determine where to place bets for subsequent rounds of play. The goal of Tracker Betting is to identify patterns in the outcomes of each round of play and then use this information to make informed decisions about where to place bets in order to maximize profits and minimize losses.'

In [44]:
answer = ask_qa('Generate many questions(Q:) and answers(A:), and the answers must be contained in the above content.')
answer

Llama.generate: prefix-match hit

llama_print_timings:        load time =    1408.84 ms
llama_print_timings:      sample time =     745.15 ms /  2048 runs   (    0.36 ms per token,  2748.45 tokens per second)
llama_print_timings: prompt eval time =    1044.97 ms /    32 tokens (   32.66 ms per token,    30.62 tokens per second)
llama_print_timings:        eval time =  334022.37 ms /  2047 runs   (  163.18 ms per token,     6.13 tokens per second)
llama_print_timings:       total time =  342075.28 ms


' A: Sure, here are some questions and answers based on the context provided:\n\nQ1: What is Target Baccarat?\nA1: Target Baccarat is a professional-level baccarat betting strategy that has never lost in over 10,000 documented games played by real players.\nQ2: How does Target Baccarat work?\nA2: Target Baccarat uses a combination of Target Betting and Tracker Betting to determine where to bet and how much to bet. The strategy also includes a Unique Bet Timing System that is used to pick the optimal time to place each bet.\n\nQ3: What are some key features of Target Baccarat?\nA3: Some key features of Target Baccarat include its ability to consistently produce explosive profits, its use of a combination of Target Betting and Tracker Betting to determine where to bet and how much to bet, its inclusion of a Unique Bet Timing System that is used to pick the optimal time to place each bet, and its overall ease of use and ability to be quickly learned and easily applied by both new and expe

In [43]:
answer = ask_qa('summarize the context')
answer

Llama.generate: prefix-match hit

llama_print_timings:        load time =    1408.84 ms
llama_print_timings:      sample time =      33.60 ms /    89 runs   (    0.38 ms per token,  2648.49 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    9647.51 ms /    89 runs   (  108.40 ms per token,     9.23 tokens per second)
llama_print_timings:       total time =    9841.62 ms


' Target Baccarat is a professional-level baccarat strategy that has never lost. It uses Target Betting, a system borrowed from stock trading, to set up winning bets. The strategy also determines the size of bets using a Unique Betting Strategy. Overall, Target Baccarat provides an easy-to-learn and use professional-level baccarat strategy that has never lost.'