In [1]:
#pip install transformers trl accelerate torch bitsandbytes peft datasets -qU
#pip install scipy

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
import torch

In [2]:
nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16,
)

In [3]:
MODEL_PATH = f"../models/openchat-3.5-1210"
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    quantization_config=nf4_config,
    device_map='auto',
    local_files_only=True,
    trust_remote_code=False,
    use_cache=True
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]



In [4]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
#tokenizer.pad_token = tokenizer.eos_token
#tokenizer.padding_side = "right"

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [5]:
deci_generator = pipeline("text-generation",
                          model=model,
                          tokenizer=tokenizer,
                          temperature=0.1,
                          device_map="auto",
                          max_length=1024,
                          return_full_text=False
)

In [6]:
def ask(user_input):
   #GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi<|end_of_turn|>GPT4 Correct User: How are you today?<|end_of_turn|>GPT4 Correct Assistant:
   SYSTEM_PROMPT_TEMPLATE ="""GPT4 Correct User: {user_input}<|end_of_turn|>GPT4 Correct Assistant: """
   prompt = SYSTEM_PROMPT_TEMPLATE.format(user_input=user_input)
   inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
   output = model.generate(**inputs,
                            max_new_tokens=500,
                            num_beams=5,
                            no_repeat_ngram_size=4,
                            early_stopping=True,
                            eos_token_id=tokenizer.eos_token_id, 
                            pad_token_id=tokenizer.eos_token_id, 
                            )
   output = tokenizer.decode(output[0], skip_special_tokens=False)
   ss = output.split('<|end_of_turn|>')[-2:]
   ss = ''.join(ss).replace('GPT4 Correct Assistant: ', '').lstrip()
   
   return ss
   
   

In [7]:
ask("What is your name?")



'My name is ChatGPT, a large language model created by OpenAI.'

In [8]:
context = """
Question: What is Depbon in the promo code?
Answer: Depbon means deposit bonus.

Question: What is ACE in the promo code?
Answer: ACE means MNL Bonus for BET.
South Korea is an exception. South Korea players use the 'TOP' promo code.
"""

prompt = """
GPT4 Correct User:
###
Instruction: Answer the question based on your gaming knowledge. 
If the answer cannot be found from the context, respond with 'I don't know.'.
Here is context to help:

{context}

### QUESTION:
{question} 
<|end_of_turn|>GPT4 Correct Assistant:
"""

ask(prompt.format(context=context, question="What is ACE in the promo code?"))

'Ace in a promo code typically stands for a special bonus or promotion offered by a gaming platform. In this context, ACE means "MNL Bonus" for BET, which is a specific bonus or promotion for bettors in the Philippines. However, South Korean players have a different promo code, which is \'TOP\'.'