# TRAINING

In [None]:
!pip install -q -U trl transformers accelerate peft
!pip install -q datasets bitsandbytes einops
!pip install -q flash_attn
!pip install accelerate
#!pip install -i https://pypi.org/simple/ bitsandbytes

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig
import gc

In [None]:
class HF_Evo():

    model_name: str = "togethercomputer/evo-1-8k-base"
    device: str = "cuda:0" if torch.cuda.is_available() else "cpu"
    revision: str = "1.1_fix"

    def __init__(self, model_name=None, revision=None):

        if model_name is not None:
          self.model_name = model_name
        else:
          print('Model name needed! Using default: ' + self.model_name)
        if revision is not None:
          self.revision = revision

        self.config = AutoConfig.from_pretrained(self.model_name,
                                            trust_remote_code=True,
                                            revision=self.revision)

        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name,
            config=self.config,
            trust_remote_code=True,
            #load_in_8bit=False,
            #torch_dtype=torch.float16,
            revision=self.revision).to(self.device)


        self.model.config.use_cache = True
        self.model.eval()

        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name,
                                                      trust_remote_code=True)

        self.tokenizer.add_special_tokens({'eos_token': ' '})
        self.tokenizer.pad_token = self.tokenizer.eos_token

        print('Tokenizer pad token:', self.tokenizer.pad_token)
        print('Tokenizer eos token:', self.tokenizer.eos_token)

def run_model(model, tokenizer, prompt, max_new_tokens=1000, temp=1, rep_penalty=None,
              top_k=4, top_p=1, alpha=None, device='cuda:0'):

    model.eval()

    input_ids = tokenizer(prompt, return_tensors="pt").to(device)

    del input_ids['token_type_ids']
    outputs = model.generate(
            **input_ids,
            max_new_tokens=max_new_tokens,
            temperature=temp,
            repetition_penalty=rep_penalty,
            top_k=top_k,
            top_p=top_p,
            penalty_alpha=alpha,
            do_sample=temp is not None,
            eos_token_id=tokenizer.eos_token_id)

    print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
try:
  evo = None
  evo = HF_Evo()
except:
  torch.cuda.empty_cache()
  gc.collect()
  if evo is not None:
    del evo.model, evo.tokenizer, evo
  evo = HF_Evo()

In [None]:
evo.model.dtype

In [None]:
#test the model is working properly with known prompts but also not working when prompted with TA special tokens
prompt = "@!"
run_model(evo.model, evo.tokenizer,prompt)

In [None]:
# to read files that have everything in one line and error in json.loads()
import json

f = open('/content/training_data_5k_2.json')
doc = f.read()
x = doc.split('"')
seqs = x[7::8]
f.close()

dataset = []
for seq in seqs:
  #temp = seq.replace('`','>')
  dataset.append({'text':seq})

In [None]:
# to read files already as json

#import json

#f = open('/content/training_data_5k.json')
#doc = f.read()
#dataset = json.loads(doc)
#f.close()
#print(len(dataset), dataset[0])

In [None]:
# #to read files with individual dicts of sequence per line

# import json

# f = open('/content/training_data.json')
# dataset = []
# lines = f.readlines()
# for l in lines:
#  json_obj = json.loads(l)
#  dataset.append(json_obj)
# f.close()
# dataset[0]

In [None]:
import copy
c=0
MAX_LENGTH = 1024 #4096 for A100
filtered_dataset = []
dataset_size = 10000

for i,d in enumerate(dataset):

  if i>=dataset_size:
    break

  t = d['text'].strip()
  if len(t) > MAX_LENGTH:
    c=c+1
    continue
  temp = evo.tokenizer(t, return_tensors="pt", truncation=True, padding='max_length', max_length=MAX_LENGTH)
  temp['input_ids'] = copy.deepcopy(temp['input_ids'][0])
  temp['attention_mask'] = copy.deepcopy(temp['attention_mask'][0])
  temp['token_type_ids'] = copy.deepcopy(temp['token_type_ids'][0])
  temp["labels"] = copy.deepcopy(temp['input_ids'])
  temp['text'] = t
  #temp['record'] = copy.deepcopy(d['record'])

  filtered_dataset.append(temp)

print('Seqs longer than max_length:',c)
print(len(filtered_dataset), filtered_dataset[0]['text'])

In [None]:
from datasets import Dataset
dataset = Dataset.from_list(filtered_dataset)

In [None]:
test_size = int(len(dataset)*0.2)
split_dataset = dataset.train_test_split(test_size=test_size, seed=0)

In [None]:
split_dataset

In [None]:
from transformers import DefaultDataCollator
#Data collator
data_collator = DefaultDataCollator(return_tensors="pt")

In [None]:
linear_layers=[]
for n,m in evo.model.named_modules():
  if "Linear" in str(type(m)):
    linear_layers.append(n)
print(linear_layers, len(linear_layers))

In [None]:
#select only certain layers (MLPs layers or MHA layers or both, etc)
mlp_layers=[]
for ll in linear_layers:
  #if "mlp" in ll or "mha" in ll:
  if "mha" in ll:
    mlp_layers.append(ll)
print(len(mlp_layers[:]), mlp_layers[:])


In [None]:
from peft import LoraConfig, get_peft_model

lora_alpha = 64 # thumb rule is 2x of r https://magazine.sebastianraschka.com/p/practical-tips-for-finetuning-llms
lora_dropout = 0.1 # 0.05 recomended
lora_r = 64 # between 8 and 16 because of resrouces available (change maybe?)

lora_config = LoraConfig(
                 r = lora_r, # the dimension of the low-rank matrices
                 lora_alpha = lora_alpha, # scaling factor for the weight matrices
                 lora_dropout = lora_dropout, # dropout probability of the LoRA layers
                 bias="none", #we can change this to change performance
                 #task_type="CAUSAL_LM",          #could also not include this
                 target_modules=mlp_layers,
                 init_lora_weights = 'gaussian',
                 #is_prompt_learning=True
                 )

## more data her: https://huggingface.co/docs/peft/main/en/package_reference/lora#peft.LoraConfig

In [None]:
from transformers import TrainingArguments, Trainer
import bitsandbytes

EPOCHS = 3
LEARNING_RATE = 3e-4
MODEL_SAVE_FOLDER_NAME = "lora_evo_ta_all_layers_18_attention_layers"
training_args = TrainingArguments(
                    output_dir=MODEL_SAVE_FOLDER_NAME,
                    overwrite_output_dir=True,
                    warmup_steps=500,
                    gradient_accumulation_steps=1,
                    per_device_train_batch_size=1,
                    per_device_eval_batch_size=1,
                    learning_rate=LEARNING_RATE,
                    num_train_epochs=EPOCHS,
                    logging_strategy="steps",
                    evaluation_strategy="steps",
                    eval_steps=1200,
                    logging_steps=1200,
                    save_strategy="epoch",
                    log_level = 'debug',
                    logging_dir = './log/',
                    do_train = True,
                    do_eval = True,
                    lr_scheduler_type = "constant",

)

In [None]:
evo_peft_model = get_peft_model(evo.model, lora_config)
evo_peft_model.print_trainable_parameters()

In [None]:
evo.tokenizer.pad_token = evo.tokenizer.eos_token
trainer = Trainer(
        model=evo_peft_model,
        tokenizer=evo.tokenizer,
        args=training_args,
        train_dataset=split_dataset['train'],
        eval_dataset=split_dataset['test'],
        data_collator=data_collator,
)

trainer.can_return_loss = True

evo_peft_model.config.use_cache = False

#for name, module in trainer.model.named_modules():
#    if "norm" in name:
#        module = module.to(torch.float32)

In [None]:
trainer.evaluate()

In [None]:
trainer.train()
## only saves the incremental 🤗 PEFT weights (adapter_model.bin) that were trained, meaning it is super efficient to store, transfer, and load.
#trainer.model.save_pretrained(MODEL_SAVE_FOLDER_NAME)
## save the full model and the training arguments
#trainer.save_model(MODEL_SAVE_FOLDER_NAME)
#trainer.model.config.save_pretrained(MODEL_SAVE_FOLDER_NAME)
evo_peft_model.config.use_cache = True

In [None]:
lora_params = {n: p for n, p in trainer.model.named_parameters() if "lora_B" in n}
for n, p in lora_params.items():
    print(n, p.sum())

In [None]:
split_dataset['test'][100]['text']

In [None]:
prompt="!!"
run_model(trainer.model, trainer.tokenizer, prompt)

In [None]:
prompt="``"
run_model(trainer.model, trainer.tokenizer, prompt)

In [None]:
prompt="@@"
run_model(trainer.model, trainer.tokenizer, prompt)

In [None]:
prompt="!ATG"
run_model(trainer.model, trainer.tokenizer, prompt)

In [None]:
from huggingface_hub import notebook_login

HF_token = # HF_TOKEN_REQUIRED

#evo_peft_model.save_pretrained("trained_"+MODEL_SAVE_FOLDER_NAME)
#trainer.model.save_pretrained("trained_from_trainer_"+MODEL_SAVE_FOLDER_NAME)
#trainer.tokenizer.save_pretrained("tokenizer_"+MODEL_SAVE_FOLDER_NAME)

notebook_login()

In [None]:
trainer.push_to_hub("lsmille/"+MODEL_SAVE_FOLDER_NAME)

In [None]:
split_dataset['train'].to_json(MODEL_SAVE_FOLDER_NAME+"_train.jsonl")
split_dataset['test'].to_json(MODEL_SAVE_FOLDER_NAME+"_test.jsonl")

# ***Validation***

Restart session and download finetuned model

In [None]:
!pip install -q -U trl transformers accelerate peft
!pip install -q datasets bitsandbytes einops
!pip install -q flash_attn
!pip install accelerate
#!pip install -i https://pypi.org/simple/ bitsandbytes

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig
import gc

In [None]:
evo = HF_Evo()

In [None]:
evo.model.eval()

In [None]:
run_model(evo.model, evo.tokenizer, "``")

In [None]:
evo.model.config.use_cache = False

adapter = 'lsmille/lora_evo_ta_all_layers_13'
evo.model.load_adapter(adapter)
evo.model.config.use_cache = True
evo.model.eval()

In [None]:
run_model(evo.model, evo.tokenizer, "!!")

In [None]:
model_name = 'lsmille/lora_evo_ta_all_layers_16'

model_reloaded = AutoModelForCausalLM.from_pretrained(model_name,
                                                      trust_remote_code=True).to('cuda:0')
model_reloaded.config.use_cache = True
model_reloaded.eval()

In [None]:
evo_tokenizer=AutoTokenizer.from_pretrained("togethercomputer/evo-1-8k-base",trust_remote_code=True)
#tokenizer_reload = AutoTokenizer.from_pretrained(model_name,trust_remote_code=True)
evo_tokenizer.add_special_tokens({'eos_token': ' '})
evo_tokenizer.pad_token = evo_tokenizer.eos_token

In [None]:
prompt='@!'
run_model(model_reloaded, evo_tokenizer, prompt)

In [None]:
evo = HF_Evo()

In [None]:
prompt='@!'
run_model(evo.model, evo.tokenizer, prompt)

In [None]:
model_name = 'lsmille/lora_evo_ta_all_layers_13'

#config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
model_reloaded2 = AutoModelForCausalLM.from_pretrained(model_name,
                                                       config=config,
                                                       revision=revision,
                                                       trust_remote_code=True).to('cuda:0')
model_reloaded2.config.use_cache = True
model_reloaded2.eval()

In [None]:
prompt='@!'
run_model(model_reloaded2, evo_tokenizer, prompt)

In [None]:
# from trl import SFTTrainer

# max_seq_length = 1024

# trainer = SFTTrainer(
#     model=evo.model,
#     train_dataset=dataset,
#     eval_dataset=dataset_test,
#     peft_config=lora_config,
#     dataset_text_field="text",
#     max_seq_length=max_seq_length,
#     tokenizer=evo.tokenizer,
#     args=training_args,
# )

In [None]:
#trainer.train()

In [None]:
#this is if you dont load lora in model same as commenst two above
#also change get_peft_model

# from trl import SFTTrainer

# max_seq_length = 512

# trainer = SFTTrainer(
#     model=evo.model,
#     train_dataset=dataset,
#     peft_config=lora_config, #lora config is here
#     dataset_text_field="text",
#     max_seq_length=max_seq_length,
#     tokenizer=evo.tokenizer,
#     args=training_args,
# )

# trainer.train()

#try same inference but with new model