In [1]:
base_model = "gpjt/8xa100m40"

In [2]:
dataset_source = "gpjt/openassistant-guanaco-llama2-format"

In [3]:
from datasets import load_dataset

dataset = load_dataset(dataset_source)

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(base_model)

In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained(base_model, device_map="cuda", trust_remote_code=True)

model.safetensors:   0%|          | 0.00/702M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/166 [00:00<?, ?B/s]

In [6]:
prompt_template = """
<s>[INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<</SYS>>

{question} [/INST]
{response}
"""

import time
from transformers import pipeline

def ask_question(model, question):
    pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_new_tokens=100)
    prompt = prompt_template.format(question=question, response="")
    tokens_in = len(tokenizer(prompt)["input_ids"])
    start = time.time()
    result = pipe(prompt)
    end = time.time()
    generated_text = result[0]['generated_text']
    tokens_out = len(tokenizer(generated_text)["input_ids"])
    print(generated_text)
    tokens_generated = tokens_out - tokens_in
    time_taken = end - start
    tokens_per_second = tokens_generated / time_taken
    print(f"{tokens_generated} tokens in {time_taken:.2f}s: {tokens_per_second:.2f} tokens/s)")

In [7]:
ask_question(model, "Who is Leonardo Da Vinci?")

Device set to use cuda



<s>[INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<</SYS>>

Who is Leonardo Da Vinci? [/INST]


Who is Leonardo Da Vinci?
Who is Leonardo Da Vinci? [INST]command
Who is Leonardo Da Vinci? [INST]command
Who is Leonardo Da Vinci?
Who is Leonardo Da Vinci?
How do you know the answer to the question?
What is Leonardo Da Vinci?
Who is Leonardo Da Vinci?
What is Leonardo Da Vinci?
Who is Leonardo Da Vinci?
Who is Leonardo Da Vinci?

100 tokens in 0.74s: 134.69 tokens/s)


In [8]:
def tokenize_function(examples):
    tokenized = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=1024)
    tokenized["labels"] = tokenized["input_ids"][:]
    return tokenized

tokenized_dataset = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/9846 [00:00<?, ? examples/s]

Map:   0%|          | 0/518 [00:00<?, ? examples/s]

In [9]:
from transformers import TrainingArguments,Trainer

batch_size = 6
args = TrainingArguments(
    'outputs', 
    learning_rate=8e-5, 
    warmup_ratio=0.1, 
    lr_scheduler_type='cosine', 
    fp16=True,
    eval_strategy="epoch", 
    eval_on_start=True,
    per_device_train_batch_size=batch_size, 
    per_device_eval_batch_size=batch_size * 2,
    num_train_epochs=6, 
    weight_decay=0.01, 
    report_to='none'
)

In [10]:
trainer = Trainer(
    model, args, 
    train_dataset=tokenized_dataset['train'], 
    eval_dataset=tokenized_dataset['test'],
    processing_class=tokenizer,
)

In [11]:
trainer.train()

Epoch,Training Loss,Validation Loss
0,No log,3.683675
1,2.213300,2.074807
2,1.942700,1.94135
3,1.778000,1.887485
4,1.641900,1.855742
5,1.559600,1.847671
6,1.523700,1.847974


TrainOutput(global_step=9846, training_loss=1.813321922747023, metrics={'train_runtime': 3230.1163, 'train_samples_per_second': 18.289, 'train_steps_per_second': 3.048, 'total_flos': 4.487155818902323e+16, 'train_loss': 1.813321922747023, 'epoch': 6.0})

In [12]:
ask_question(model, "Who is Leonardo Da Vinci?")

Device set to use cuda



<s>[INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<</SYS>>

Who is Leonardo Da Vinci? [/INST]

 Leonardo Da Vinci is a German-born German-born German-born German-French-Canadian-American-American-American-American-American-American-American-American-American-American-American-American-American-American-American-American-American-American-American-American-American-American-American-American, and his mother, Alice. He is best known for his work as a psychologist, as well as his work as a psychotherapist and teacher. D