In [2]:
"""
Testing feasibility of pretraining data augmentation
Successfully run inference of Mistral 7B on Kaggle GPU T4 * 2 using bitsandbytes
"""

# Package requirements
# !pip install git+https://github.com/huggingface/transformers
# !pip install accelerate
# !pip install -i https://pypi.org/simple/ bitsandbytes

Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-3dgs4cf_
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/pip-req-build-3dgs4cf_
  Resolved https://github.com/huggingface/transformers to commit 0ad770c3733f9478a8d9d0bc18cc6143877b47a2
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: transformers
  Building wheel for transformers (pyproject.toml) ... [?25ldone
[?25h  Created wheel for transformers: filename=transformers-4.39.0.dev0-py3-none-any.whl size=8593793 sha256=2a28a0b752caa46db80efada8c7d8c7023abb6fc694aa394431001f2dfe0ca5a
  Stored in directory: /tmp/pip-ephem-wheel-cache-upnnmk2r/wheels/c0/14/d6/6c9a5582d2ac191ec0a483be151a4495fe1eb2a6706ca49f1b
Successfully built transformers
Insta

In [3]:
from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer
import torch

model_name_or_path = "mistralai/Mistral-7B-Instruct-v0.1"
config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)
config.max_position_embeddings = 8096
quantization_config = BitsAndBytesConfig(
llm_int8_enable_fp32_cpu_offload=True,
bnb_4bit_quant_type='nf4',
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16,
load_in_4bit=True
)

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
model_name_or_path,
config=config,
trust_remote_code=True,
quantization_config=quantization_config,
device_map="auto",
offload_folder="./offload"
)

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.47k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [17]:
import pickle
import json

prompt = json.load(open('mistral_prompt.json'))

""" Template from huggingface
messages = [
    {"role": "user", "content": "What is your favourite condiment?"},
    {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
    {"role": "user", "content": "Do you have mayonnaise recipes?"}
]
"""

def generate_message(emotion, context):
    return [
        {"role": "user", "content": prompt["prompt_1"] + emotion + "\n" + context},
    ]

In [19]:
messages = generate_message("panic", "Jason forgot his password to the library website")
messages

[{'role': 'user',
  'content': 'You are a creative writer who writes emotional stories instead of chatting. You are asked to further generate story given emotion and context.\nHere are the requirements:\nThere is no need to remember the conversation history except this prompt. The history prompts are independent.\nYour response should be in exactly one paragraph with simple children level language.\nYour response should be highly related to the emotion and context without too much plot twist\nYour response should not explain the context behind your generation.\nThe emotions and context are:\npanic\nJason forgot his password to the library website'}]

In [20]:
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
device = "cuda"
model_inputs = encodeds.to(device)

In [22]:
generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [23]:
print(decoded[0])

<s> [INST] You are a creative writer who writes emotional stories instead of chatting. You are asked to further generate story given emotion and context.
Here are the requirements:
There is no need to remember the conversation history except this prompt. The history prompts are independent.
Your response should be in exactly one paragraph with simple children level language.
Your response should be highly related to the emotion and context without too much plot twist
Your response should not explain the context behind your generation.
The emotions and context are:
panic
Jason forgot his password to the library website [/INST] Jason felt panic as he tried to access the library website, but his password didn't work. He couldn't believe he had forgotten something so important. He quickly searched his mind for the password, but it was nowhere to be found. He felt frustrated and helpless. He didn't know what to do, and he couldn't even access the website to find out. But then he remembered 