In [1]:
#pip install transformers trl accelerate torch bitsandbytes peft datasets -qU
#pip install scipy
#torch==2.1.2
#transformers==4.36.0
#trl==0.7.4
#accelerate==0.25.0
#bitsandbytes==0.41.3.post2
#peft==0.7.0
#datasets==2.15.0
#scipy==1.11.4
#sentencepiece==0.1.99

In [28]:
import spacy
nlp = spacy.load('en_core_web_lg')

def read_file_paragraphs(file_path, chunk_size=1000):
   with open(file_path, 'r', encoding='utf-8') as file:
      text = file.read()
   doc = nlp(text)
   buffer = ""
   for sentence in doc.sents:
      if sentence.text.startswith('#') and buffer != "":
         yield buffer
         buffer = ""
      new_buffer = buffer + sentence.text
      if len(new_buffer) >= chunk_size:
         yield buffer
         new_buffer = sentence.text
      buffer = new_buffer
   if buffer != "":
      yield buffer
      buffer = ""

In [31]:
docs = []
for paragraph in read_file_paragraphs('./data-user/live-baccarat-doc.md'):
   docs.append({
      'page_content': paragraph.rstrip()
   })

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
from IPython.display import display, Markdown, Latex
from finetune_lit import load_peft_model

BASE_MODEL_PATH = "/home/flash/models/Mistral-7B-Instruct-v0.1"
PEFT_MODEL_PATH = "../fine-tune/outputs/Mistral-7B-Instruct-v0.1-qlora/"
model, tokenizer = load_peft_model(BASE_MODEL_PATH, PEFT_MODEL_PATH)


INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading PEFT ../fine-tune/outputs/Mistral-7B-Instruct-v0.1-qlora/


In [21]:
generation_config = model.generation_config
generation_config.max_new_tokens = 1024 * 2
generation_config.temperature = 0.01 
generation_config.do_sample = True
generation_config.top_k = 3
generation_config.top_p = 0.75
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.pad_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [22]:
from finetune_lit import ask_llama2_instruction_prompt

def ask(user_input):
   global model, tokenizer, generation_config
   answer = ask_llama2_instruction_prompt(model=model,
                                          generation_config=generation_config,
                                          tokenizer=tokenizer,
                                          device=model.device,
                                          question=user_input)
   return answer

In [23]:
answer = ask("what is your name?")
answer

' My name is Mistral 7B v0.1. But you can just call me Mistral.'

In [15]:
def generate_qa_from_page_content(title, page_content):
   prompt_template = """extract {title} into Q&A data.
{page_content}"""   
   prompt = prompt_template.format(title=title, page_content=page_content)
   return ask(prompt)

In [32]:
page_content = docs[0]['page_content']
page_content

"# Introduction to Live Dealer Baccarat\n\nThe objective of the game is to bet on whichever of two hands, the player's hand or the banker's hand, that the Player thinks will have a point value closest to 9.The Player can also bet on a tie.\n\nThe game is presented to the Player with a live person dealing the cards on screen to provide the Player with a realistic gaming environment in real time.\n\nThe theoretical return to player of this game is 98.41%.\nOver a long period of time, the game is likely to average a return to the Player of 98.48% of the total bets made."

In [33]:
answer = generate_qa_from_page_content("Live Dealer Baccarat", page_content)
answer

" Question: What is Live Dealer Baccarat?\nAnswer: The objective of the game is to bet on whichever of two hands, the player's hand or the banker's hand, that the Player thinks will have a point value closest to 9.The Player can also bet on a tie.\n\nThe game is presented to the Player with a live person dealing the cards on screen to provide the Player with a realistic gaming environment in real time.\n\nThe theoretical return to player of this game is 98.41%.\nOver a long period of time, the game is likely to average a return to the Player of 98.48% of the total bets made.\n\nQuestion: In Live Dealer Baccarat, what is the objective?\nAnswer: The objective of the game is to bet on whichever of two hands, the player's hand or the banker's hand, that the Player thinks will have a point value closest to 9.The Player can also bet on a tie.\n\nQuestion: In Live Dealer Baccarat, who deals the cards?\nAnswer: The game is presented to the Player with a live person dealing the cards on screen 