In [2]:
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer, 
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)

from peft import (
    LoraConfig, 
    get_peft_model, 
    prepare_model_for_kbit_training,
    TaskType
    )

from datasets import load_dataset
import torch


In [3]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
)

In [None]:
checkpoint = "huggyllama/Llama-7b"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint, 
                                             quantization_config=bnb_config,
                                             device_map={":", 0})


peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none"
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

In [None]:
dataset = load_dataset("AlanRobotics/saiga")
dataset = dataset.map(lambda example: tokenizer(example["instructions"], example["outputs"]), batched=True)
dataset = dataset["train"].train_test_split(0.1, 0.9)
tokenizer.pad_token_id = tokenizer.eos_token_id

collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

In [None]:
training_args = TrainingArguments(
    output_dir="llama",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,
    warmup_steps=2,
    logging_steps=2000,
    save_steps=2000,
    learning_rate=2e-4,
    fp16=True,
    optim="paged_adamw_8bit",
    ddp_find_unused_parameters=False,
    push_to_hub=True
)

In [None]:
trainer = Trainer(
    args=training_args,
    data_collator=collator,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
)

In [None]:
trainer.train()

In [None]:
model.save_pretrained("mistral-peft")

INFERENCE

In [None]:
from transformers import T5ForConditionalGeneration, AutoTokenizer, Seq2SeqTrainer, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq, GPT2Tokenizer, AdamW, AutoModelForCausalLM, DataCollatorForLanguageModeling
from datasets import Dataset, load_dataset
import json
import numpy as np
from peft import get_peft_model, LoraConfig, TaskType, PeftModel

from peft import PeftModel
import torch

# checkpoint = "huggyllama/Llama-7b"
checkpoint = "mistralai/Mistral-7B-v0.1"
model = AutoModelForCausalLM.from_pretrained(checkpoint)
tokenizer = AutoTokenizer.from_pretrained(checkpoint, torch_dtype=torch.float16)
model = PeftModel.from_pretrained(model, 'mistral-peft')
print(model.get_memory_footprint())

#model.push_to_hub('AlanRobotics/llama-chat', token='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')

def create_query(prompt):
    tokenized_sentence = tokenizer(prompt, return_tensors='pt')
    res = model.generate(**tokenized_sentence, max_new_tokens=50, eos_token_id=13)
    print(tokenizer.decode(res[0], skip_special_tokens=True))


sentence = """User: Что такое ИИ?
Clone:"""
create_query(sentence)