In [2]:
import os
import shutil
from typing import List
from transformers import AutoTokenizer, GenerationConfig, AutoModelForCausalLM, AutoConfig
from peft import PeftConfig, PeftModel
from train import train

### Загрузка и сохранение на диск базовой модели.

In [16]:
adapter_path = 'models/adapter'
base_adapter_name = 'IlyaGusev/saiga2_7b_lora'

os.makedirs(adapter_path, exist_ok=True)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_adapter_name, use_fast=False, padding_side='left')
generation_config = GenerationConfig.from_pretrained(base_adapter_name, do_sample=True)

config = PeftConfig.from_pretrained(base_adapter_name)

In [14]:
base_model_name = config.base_model_name_or_path
base_model_config = AutoConfig.from_pretrained(config.base_model_name_or_path)

model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    torch_dtype=base_model_config.torch_dtype,
    # load_in_8bit=True,
    # device_map="auto",
)

Loading checkpoint shards: 100%|██████████| 2/2 [00:30<00:00, 15.19s/it]


In [17]:
model = PeftModel.from_pretrained(model, base_adapter_name)

Downloading adapter_model.bin: 100%|██████████| 67.2M/67.2M [00:06<00:00, 10.7MB/s]


In [18]:
model.save_pretrained(adapter_path)
tokenizer.save_pretrained(adapter_path)
generation_config.save_pretrained(adapter_path)

### Тренировка модели вопросах и ответах.

In [2]:
output_path = './models/recepties_model'
base_adapter_path = "./models/adapter"
os.makedirs(output_path, exist_ok=True)
config = PeftConfig.from_pretrained(base_adapter_path)
base_model_name = config.base_model_name_or_path

In [None]:
train(config_file='config/train_recepies_config.json',
     train_file='datasets/train_recepies.jsonl',
     val_file='datasets/test_recepies.jsonl',
     model_path=base_model_name,
     adapter_path=base_adapter_path, 
     report_to='none',
     output_dir=output_path
     )

# Здесь, забыл загрузить ноутбук с выводом хода обучения модели. Поэтому его нет.

### Тренировка модели на диалогах с персонажем.

Происходит дообучение адаптера из предыдущего этапа.

In [2]:
output_path = './models/granny_model'
base_adapter_path="./models/recepies_model"
os.makedirs(output_path, exist_ok=True)
config = PeftConfig.from_pretrained(base_adapter_path)
base_model_name = config.base_model_name_or_path

In [3]:
train(config_file="config/train_granny_config.json",
     train_file='datasets/train_dialogues1.jsonl',
     val_file='datasets/test_dialogues1.jsonl',
     model_path=base_model_name,
     adapter_path=base_adapter_path, 
     report_to='none',
     output_dir=output_path
     )

deepspeed:  None
models/llama2-7b


Vocab size:  32000
PAD:  0 <unk>
BOS:  1 <s>
EOS:  2 </s>
UNK:  0 <unk>
SEP:  None None


100%|██████████| 18/18 [00:00<00:00, 386.56it/s]


100%|██████████| 3/3 [00:00<00:00, 345.83it/s]


Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.91s/it]


Load Adapter
training run.


Step,Training Loss,Validation Loss
5,0.191,1.293423
10,0.1722,1.243647
15,0.1731,1.162909
20,0.1537,1.084182
25,0.0945,1.019669
30,0.1278,0.962937
35,0.091,0.89921
40,0.0796,0.862403
45,0.0974,0.865269
50,0.0593,0.87335


save model to ./models/granny_model


### Запуск модели внутри сервиса.

Для того чтобы сервис подхватил модель, ее необходимо переместить из папки 
./train/models/granny_model в папку models.

In [7]:
os.makedirs('../models', exist_ok=True)
file_dir = os.path.dirname(os.path.realpath('__file__'))

source_dir = os.path.join(file_dir, 'models/granny_model')
dst_dir = os.path.join(file_dir, '../models')

In [None]:
shutil.copytree(source_dir, dst_dir, dirs_exist_ok=True, symlinks=True)