In [None]:
!pip install -q -U trl transformers accelerate peft
!pip install -q datasets bitsandbytes einops
!pip install -q flash_attn
!pip install accelerate
#!pip install -i https://pypi.org/simple/ bitsandbytes

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig
import gc



In [None]:
class HF_Evo():

    model_name: str = "togethercomputer/evo-1-8k-base"
    device: str = "cuda:0" if torch.cuda.is_available() else "cpu"
    revision: str = "1.1_fix"

    def __init__(self, model_name=None, revision=None):

        if model_name is not None:
          self.model_name = model_name
        else:
          print('Model name needed! Using default: ' + self.model_name)
        if revision is not None:
          self.revision = revision

        self.config = AutoConfig.from_pretrained(self.model_name,
                                            trust_remote_code=True,
                                            revision=self.revision)

        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name,
            config=self.config,
            trust_remote_code=True,
            #load_in_8bit=False,
            #torch_dtype=torch.float16,
            revision=self.revision).to(self.device)


        self.model.config.use_cache = True
        self.model.eval()

        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name,
                                                      trust_remote_code=True)

        self.tokenizer.add_special_tokens({'eos_token': ' '})
        self.tokenizer.pad_token = self.tokenizer.eos_token

        print('Tokenizer pad token:', self.tokenizer.pad_token)
        print('Tokenizer eos token:', self.tokenizer.eos_token)

def run_model(model, tokenizer, prompt, max_new_tokens=1000, temp=1, rep_penalty=None,
              top_k=4, top_p=1, alpha=None, device='cuda:0'):

    model.eval()

    input_ids = tokenizer(prompt, return_tensors="pt").to(device)

    del input_ids['token_type_ids']
    outputs = model.generate(
            **input_ids,
            max_new_tokens=max_new_tokens,
            temperature=temp,
            repetition_penalty=rep_penalty,
            top_k=top_k,
            top_p=top_p,
            penalty_alpha=alpha,
            do_sample=temp is not None,
            eos_token_id=tokenizer.eos_token_id)

    print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
try:
  evo = None
  evo = HF_Evo()
except:
  torch.cuda.empty_cache()
  gc.collect()
  if evo is not None:
    del evo.model, evo.tokenizer, evo
  evo = HF_Evo()

Model name needed! Using default: togethercomputer/evo-1-8k-base


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Tokenizer pad token:  
Tokenizer eos token:  


In [None]:
evo.model.dtype

torch.bfloat16

In [None]:
prompt = "@!"
#run_model(evo.model, evo.tokenizer,prompt)

In [None]:
# to read files that have everything in one line and error in json.loads()
import json

f = open('/content/training_data_5k_2.json')
doc = f.read()
x = doc.split('"')
seqs = x[7::8]
f.close()

dataset = []
for seq in seqs:
  dataset.append({'text':seq})

dataset[0]

{'text': 'TAAAACAGCTCGCTATGACTGCCTAACCTTAACAAAATCAGTTCATCATCTTTCACTAAATACACAAGCAAAACATCAGGCTTAATGTGGCATTCCCTAAAAGGTTTCCACTTTCCCTTTAAGGCATGATCTTGAAATTGTGGATCTAGCGGTTCTTTTTTTCTTAAGGTTAGAATGACTTCATTCAAAACGCTATCATCAAACCCATTCAAAAGCAATTTATCAAAATCTTTTTGAAAAGATTTTTTAAGATTGAGCTTCAACAC`TAAGATTGAGCTTCAACACCTAAAGCCCTTTTTCTTTCATTGCTGTAACTAGAAAAATCCTCAACAATCAAATCTGTCTCTTTGTTACCTACATCTCTCATGGCTTGTTGCGTTTCAATGTTTGGGATCTCATGCCCCAAACAACAATCTCTTTTATCATCAAAAGCTTGACTGATTTTTTGCAAGAGTTCATTTAACGCGTCTATTTTATCCTTAAAGTTTTGATCCCTTTTTTCCAATTCTTTAGCCATTTTTTCTTTAAAAGAAATTCTATCATTTTGCATCTTTTTGATTTTTCGCTCTAATTGATGGATTAAATTAAAAAGCTGTTTTTTGCTGTATTTTGTGTAGTCTTTTTTGGCGGTGGTGTTAGGCAT!@                                                                                                                                                                                                                                                                                                                                                        

In [None]:
# to read files already as json
#import json

#f = open('/content/training_data_5k.json')
#doc = f.read()
#dataset = json.loads(doc)
#f.close()
#print(len(dataset), dataset[0])

In [None]:
# to read files with each sequence per line
#import json

#f = open('/content/training_data_1k.json')
#dataset = []
#lines = f.readlines()
#for l in lines:
#  json_obj = json.loads(l)
#  dataset.append(json_obj)
#f.close()
#dataset[0]

In [None]:
import copy
c=0
MAX_LENGTH = 1024 #4096 for A100
filtered_dataset = []
dataset_size = 10000

for i,d in enumerate(dataset):

  if i>=dataset_size:
    break

  #temp = evo.tokenizer(t, return_tensors="pt", truncation=True, padding=True, max_length=MAX_LENGTH).to(evo.device) # older
  #temp = evo.tokenizer(t, return_tensors="pt", truncation=False, padding=True) #works
  t = d['text'].strip()
  if len(t) > MAX_LENGTH:
    c=c+1
    continue
  temp = evo.tokenizer(t, return_tensors="pt", truncation=True, padding='max_length', max_length=MAX_LENGTH)
  temp['input_ids'] = copy.deepcopy(temp['input_ids'][0])
  temp['attention_mask'] = copy.deepcopy(temp['attention_mask'][0])
  temp['token_type_ids'] = copy.deepcopy(temp['token_type_ids'][0])
  temp["labels"] = copy.deepcopy(temp['input_ids'])
  temp['text'] = t
  #temp['record'] = copy.deepcopy(d['record'])

  filtered_dataset.append(temp)

print('Seqs longer than max_length:',c)
#print(dataset[0])
print(len(filtered_dataset), filtered_dataset[0])

Seqs longer than max_length: 0
10000 {'input_ids': tensor([84, 65, 65,  ..., 32, 32, 32]), 'token_type_ids': tensor([0, 0, 0,  ..., 0, 0, 0]), 'attention_mask': tensor([1, 1, 1,  ..., 0, 0, 0]), 'labels': tensor([84, 65, 65,  ..., 32, 32, 32]), 'text': 'TAAAACAGCTCGCTATGACTGCCTAACCTTAACAAAATCAGTTCATCATCTTTCACTAAATACACAAGCAAAACATCAGGCTTAATGTGGCATTCCCTAAAAGGTTTCCACTTTCCCTTTAAGGCATGATCTTGAAATTGTGGATCTAGCGGTTCTTTTTTTCTTAAGGTTAGAATGACTTCATTCAAAACGCTATCATCAAACCCATTCAAAAGCAATTTATCAAAATCTTTTTGAAAAGATTTTTTAAGATTGAGCTTCAACAC`TAAGATTGAGCTTCAACACCTAAAGCCCTTTTTCTTTCATTGCTGTAACTAGAAAAATCCTCAACAATCAAATCTGTCTCTTTGTTACCTACATCTCTCATGGCTTGTTGCGTTTCAATGTTTGGGATCTCATGCCCCAAACAACAATCTCTTTTATCATCAAAAGCTTGACTGATTTTTTGCAAGAGTTCATTTAACGCGTCTATTTTATCCTTAAAGTTTTGATCCCTTTTTTCCAATTCTTTAGCCATTTTTTCTTTAAAAGAAATTCTATCATTTTGCATCTTTTTGATTTTTCGCTCTAATTGATGGATTAAATTAAAAAGCTGTTTTTTGCTGTATTTTGTGTAGTCTTTTTTGGCGGTGGTGTTAGGCAT!@'}


In [None]:
from datasets import Dataset
dataset = Dataset.from_list(filtered_dataset)

In [None]:
test_size = int(len(dataset)*0.2)
split_dataset = dataset.train_test_split(test_size=test_size, seed=0)

In [None]:
split_dataset

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels', 'text'],
        num_rows: 8000
    })
    test: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels', 'text'],
        num_rows: 2000
    })
})

In [None]:
from transformers import DefaultDataCollator
#Data collator
data_collator = DefaultDataCollator(return_tensors="pt")

In [None]:
linear_layers=[]
for n,m in evo.model.named_modules():
  if "Linear" in str(type(m)):
    linear_layers.append(n)
print(linear_layers, len(linear_layers))

['backbone.blocks.0.projections', 'backbone.blocks.0.out_filter_dense', 'backbone.blocks.0.mlp.l1', 'backbone.blocks.0.mlp.l2', 'backbone.blocks.0.mlp.l3', 'backbone.blocks.1.projections', 'backbone.blocks.1.out_filter_dense', 'backbone.blocks.1.mlp.l1', 'backbone.blocks.1.mlp.l2', 'backbone.blocks.1.mlp.l3', 'backbone.blocks.2.projections', 'backbone.blocks.2.out_filter_dense', 'backbone.blocks.2.mlp.l1', 'backbone.blocks.2.mlp.l2', 'backbone.blocks.2.mlp.l3', 'backbone.blocks.3.projections', 'backbone.blocks.3.out_filter_dense', 'backbone.blocks.3.mlp.l1', 'backbone.blocks.3.mlp.l2', 'backbone.blocks.3.mlp.l3', 'backbone.blocks.4.projections', 'backbone.blocks.4.out_filter_dense', 'backbone.blocks.4.mlp.l1', 'backbone.blocks.4.mlp.l2', 'backbone.blocks.4.mlp.l3', 'backbone.blocks.5.projections', 'backbone.blocks.5.out_filter_dense', 'backbone.blocks.5.mlp.l1', 'backbone.blocks.5.mlp.l2', 'backbone.blocks.5.mlp.l3', 'backbone.blocks.6.projections', 'backbone.blocks.6.out_filter_dense'

In [None]:
#select only MLPs layers
mlp_layers=[]
for ll in linear_layers:
  if "mlp" in ll or "mha" in ll:
    mlp_layers.append(ll)
print(len(mlp_layers[3:]), mlp_layers[3:])


99 ['backbone.blocks.1.mlp.l1', 'backbone.blocks.1.mlp.l2', 'backbone.blocks.1.mlp.l3', 'backbone.blocks.2.mlp.l1', 'backbone.blocks.2.mlp.l2', 'backbone.blocks.2.mlp.l3', 'backbone.blocks.3.mlp.l1', 'backbone.blocks.3.mlp.l2', 'backbone.blocks.3.mlp.l3', 'backbone.blocks.4.mlp.l1', 'backbone.blocks.4.mlp.l2', 'backbone.blocks.4.mlp.l3', 'backbone.blocks.5.mlp.l1', 'backbone.blocks.5.mlp.l2', 'backbone.blocks.5.mlp.l3', 'backbone.blocks.6.mlp.l1', 'backbone.blocks.6.mlp.l2', 'backbone.blocks.6.mlp.l3', 'backbone.blocks.7.mlp.l1', 'backbone.blocks.7.mlp.l2', 'backbone.blocks.7.mlp.l3', 'backbone.blocks.8.inner_mha_cls.Wqkv', 'backbone.blocks.8.inner_mha_cls.out_proj', 'backbone.blocks.8.mlp.l1', 'backbone.blocks.8.mlp.l2', 'backbone.blocks.8.mlp.l3', 'backbone.blocks.9.mlp.l1', 'backbone.blocks.9.mlp.l2', 'backbone.blocks.9.mlp.l3', 'backbone.blocks.10.mlp.l1', 'backbone.blocks.10.mlp.l2', 'backbone.blocks.10.mlp.l3', 'backbone.blocks.11.mlp.l1', 'backbone.blocks.11.mlp.l2', 'backbone.b

In [None]:
from peft import LoraConfig, get_peft_model

lora_alpha = 128 # thumb rule is 2x of r https://magazine.sebastianraschka.com/p/practical-tips-for-finetuning-llms
lora_dropout = 0.1 # 0.05 recomended
lora_r = 128 # between 8 and 16 because of resrouces available (change maybe?)

lora_config = LoraConfig(
                 r = lora_r, # the dimension of the low-rank matrices
                 lora_alpha = lora_alpha, # scaling factor for the weight matrices
                 lora_dropout = lora_dropout, # dropout probability of the LoRA layers
                 bias="none", #we can change this to change performance
                 #task_type="CAUSAL_LM",          #could also not include this
                 target_modules=mlp_layers[3:],
                 init_lora_weights = 'gaussian',
                 #is_prompt_learning=True
                 )

## more data her: https://huggingface.co/docs/peft/main/en/package_reference/lora#peft.LoraConfig

In [None]:
from transformers import TrainingArguments, Trainer
import bitsandbytes

EPOCHS = 3
LEARNING_RATE = 3e-4 #change
MODEL_SAVE_FOLDER_NAME = "lora_evo_ta_all_layers_16"
training_args = TrainingArguments(
                    output_dir=MODEL_SAVE_FOLDER_NAME,
                    overwrite_output_dir=True,
                    warmup_steps=500,
                    gradient_accumulation_steps=1,
                    per_device_train_batch_size=2,
                    per_device_eval_batch_size=2,
                    learning_rate=LEARNING_RATE,
                    num_train_epochs=EPOCHS,
                    logging_strategy="steps",
                    evaluation_strategy="steps",
                    eval_steps=1999,
                    logging_steps=1000,
                    save_strategy="epoch",
                    log_level = 'debug',
                    logging_dir = './log/',
                    do_train = True,
                    do_eval = True,
                    lr_scheduler_type = "constant",

)



In [None]:
evo_peft_model = get_peft_model(evo.model, lora_config)
evo_peft_model.print_trainable_parameters()

trainable params: 188,282,880 || all params: 6,641,063,936 || trainable%: 2.8351


In [None]:
evo.tokenizer.pad_token = evo.tokenizer.eos_token
trainer = Trainer(
        model=evo_peft_model,
        tokenizer=evo.tokenizer,
        args=training_args,
        train_dataset=split_dataset['train'],
        eval_dataset=split_dataset['test'],
        data_collator=data_collator,
)

trainer.can_return_loss = True

evo_peft_model.config.use_cache = False

#for name, module in trainer.model.named_modules():
#    if "norm" in name:
#        module = module.to(torch.float32)

In [None]:
trainer.train()
## only saves the incremental 🤗 PEFT weights (adapter_model.bin) that were trained, meaning it is super efficient to store, transfer, and load.
#trainer.model.save_pretrained(MODEL_SAVE_FOLDER_NAME)
## save the full model and the training arguments
#trainer.save_model(MODEL_SAVE_FOLDER_NAME)
#trainer.model.config.save_pretrained(MODEL_SAVE_FOLDER_NAME)
evo_peft_model.config.use_cache = True

Currently training with a batch size of: 2
The following columns in the training set don't have a corresponding argument in `PeftModel.forward` and have been ignored: token_type_ids, text. If token_type_ids, text are not expected by `PeftModel.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 8,000
  Num Epochs = 3
  Instantaneous batch size per device = 2
  Total train batch size (w. parallel, distributed & accumulation) = 2
  Gradient Accumulation steps = 1
  Total optimization steps = 12,000
  Number of trainable parameters = 188,282,880


Step,Training Loss,Validation Loss
1999,2.8598,2.628863
3998,2.5927,2.585203
5997,2.5467,2.571652


The following columns in the evaluation set don't have a corresponding argument in `PeftModel.forward` and have been ignored: token_type_ids, text. If token_type_ids, text are not expected by `PeftModel.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 2
The following columns in the evaluation set don't have a corresponding argument in `PeftModel.forward` and have been ignored: token_type_ids, text. If token_type_ids, text are not expected by `PeftModel.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 2
Saving model checkpoint to lora_evo_ta_all_layers_16/checkpoint-4000
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--togethercomputer--evo-1-8k-base/snapshots/cb2b3eb17365b1c22deab9114aec7afb2746c9f1/config.json
Model config StripedHyenaConfig {
  "_commit_hash": "cb2b3eb17365b1c22deab9114aec7afb2746c9f1",
  "_name_o

In [None]:
lora_params = {n: p for n, p in trainer.model.named_parameters() if "lora_B" in n}
for n, p in lora_params.items():
    print(n, p.sum())

base_model.model.backbone.blocks.0.projections.lora_B.default.weight tensor(12.6250, device='cuda:0', dtype=torch.bfloat16, grad_fn=<SumBackward0>)
base_model.model.backbone.blocks.0.out_filter_dense.lora_B.default.weight tensor(7.3438, device='cuda:0', dtype=torch.bfloat16, grad_fn=<SumBackward0>)
base_model.model.backbone.blocks.0.mlp.l1.lora_B.default.weight tensor(540., device='cuda:0', dtype=torch.bfloat16, grad_fn=<SumBackward0>)
base_model.model.backbone.blocks.0.mlp.l2.lora_B.default.weight tensor(-2.9375, device='cuda:0', dtype=torch.bfloat16, grad_fn=<SumBackward0>)
base_model.model.backbone.blocks.0.mlp.l3.lora_B.default.weight tensor(-5.4688, device='cuda:0', dtype=torch.bfloat16, grad_fn=<SumBackward0>)
base_model.model.backbone.blocks.1.projections.lora_B.default.weight tensor(29.8750, device='cuda:0', dtype=torch.bfloat16, grad_fn=<SumBackward0>)
base_model.model.backbone.blocks.1.out_filter_dense.lora_B.default.weight tensor(6.3125, device='cuda:0', dtype=torch.bfloat16

In [None]:
split_dataset['test'][100]['text']

'`TATAAAATTCCGAATATAAAAGTATAAATTAAGATGGCAAAACCAAAGATTAAGAAAATTACAGTAAAGCCTAGATAAGATTCTTTTCGTTCTTCTTTTTTAGTTTTAAGCATTGTTACCAATAATATCGTTCCTGCGATGATAAATAAGATAGAAAGCCACAGCAAAATTTGGAAATGTAAGCTCAT`GATATCCCTCCCTAGTCAAAAAT!TAGACAAATTCATAAAAAATCGGATAAATGTAATACAGTGCTAAGATAATGTTGATAAAAAATAATATTTCACTGAATAAATCACTATAGATAATTACTAAGGTCAAGCTTGAAAAAAGAGCAAAACCACAAATTGCTAAACCAGGTATTAACCATAACTTATAATCTATATTTGCGTTATAGGTTATGAGAATGTTAAATGAAAAAAATAAAATGGTGAATATCAAATTGAATATATTTGCTTTGTTTAATTTCAT!@'

In [None]:
prompt="@!"
run_model(trainer.model, trainer.tokenizer, prompt, max_new_tokens=1024)

Setting `pad_token_id` to `eos_token_id`:32 for open-end generation.


Initializing inference params...
@!TGATTAAATCTTTCAAGCACAAAGGGTTGAAGCAACTTTTTGAAAAAGGGATTACTTCTGGAGTACCTGCACCACAGGCTGAAAAGTTATCCCACCGCTTAGCGGTGATTGATGCAGCGAAAACTATTGATGATCTTGATATGCCAGGTTTTCGTTTACATCCGTTAAAAGGAAATAGAGAAGGAACATGGTCGATAACGATATCAGGAAACTGGCGTATTACGTTTGAGTTTGTTAATGGCGATGCATATATTCTGGATTACGAGGATTATCACTAA`!TGAAAATGGCCAATCATCCCCGCCCAGGGGATATTATTTATCATGAATATCTTGAACCACTCAATCTTAAAATCAATGATTTGGCTGAGTTGTTAGATGTTCATCGCAATACAGTAAGTGCACTGGTTAATAATAGTCGTAAACTTACTGCTGATATGGCAATGCGTCTGGCTAAAGTATTTGATACTACAGTCGAATTTTGGCTAAACCTGCAAACGGCTGTAGACCTTTGGGAAGTTGAAAACAATATGCGTACCCAGGAAGAGTTAAGCCGTATTGAAACAGTGGCAGAGCATCTTACTCATCGTAATGCACAGCAAAAACAGGCCGCATAG! 


In [None]:
prompt="@'"
run_model(trainer.model, trainer.tokenizer, prompt, max_new_tokens=1024)

Setting `pad_token_id` to `eos_token_id`:32 for open-end generation.


Initializing inference params...
@'TGAATAATCGCGAACAAATCGAACAATCCGTTATAAGTGCTAGTGCGTATAACGGTAATGACACAGAGGGATTACTAAAAGAGATTGAGGACGTGTATAAGAAAGCGCGAGCGTTTGATGAAATACTTGAGGGTTTACCTAATGCTATGCAAGATGCACTCAAAGAAGATATTGATCTTGATGAAGCAGTAGGTATTATGACGGGGCAAGAAGTCTATAAAAATTTGCAAAGTGATGAAGAAAAGCATGACGAAGAGTAA!GAAAAGGGGCTTGTGTGACTACATCACTTGAAATTGAAACAAATCCTTTTGATAGACAAAAATACTCAAATGATGAGCTATTTGAGTTTAAGAATACTATTTTAAATAATCCTAAAATAGAAGCTATTATCCCTCAAAAAGGTGAAAAACATAATAATTATATGAAATTTATCAATATGCAAAAAGATGGTATACCTGTCGATATCATTCTAAGCGACAGCGAAGAAAAAGAAAATTATAAACGTTTAATGCGTAAAGGGTATGAGGATAGCCATGATTGA! 


In [None]:
from huggingface_hub import notebook_login

#evo_peft_model.save_pretrained("trained_"+MODEL_SAVE_FOLDER_NAME)
#trainer.model.save_pretrained("trained_from_trainer_"+MODEL_SAVE_FOLDER_NAME)
#trainer.tokenizer.save_pretrained("tokenizer_"+MODEL_SAVE_FOLDER_NAME)

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
trainer.push_to_hub("lsmille/"+MODEL_SAVE_FOLDER_NAME)

Saving model checkpoint to lora_evo_ta_all_layers_15
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--togethercomputer--evo-1-8k-base/snapshots/cb2b3eb17365b1c22deab9114aec7afb2746c9f1/config.json
Model config StripedHyenaConfig {
  "_commit_hash": "cb2b3eb17365b1c22deab9114aec7afb2746c9f1",
  "_name_or_path": "togethercomputer/evo-1-131k-base",
  "architectures": [
    "StripedHyenaModelForCausalLM"
  ],
  "attn_layer_idxs": [
    8,
    16,
    24
  ],
  "auto_map": {
    "AutoConfig": "togethercomputer/evo-1-131k-base--configuration_hyena.StripedHyenaConfig",
    "AutoModelForCausalLM": "togethercomputer/evo-1-131k-base--modeling_hyena.StripedHyenaModelForCausalLM",
    "AutoTokenizer": [
      "togethercomputer/evo-1-131k-base--tokenizer.ByteTokenizer",
      null
    ]
  },
  "column_split": false,
  "column_split_hyena": true,
  "eps": 1e-06,
  "final_norm": true,
  "hidden_size": 4096,
  "hyena_filter_groups": 1,
  "hyena_layer_idxs": [
 

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.05k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/571M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/lsmille/lora_evo_ta_all_layers_15/commit/461cdac2fba3b8f48ed2f398eb8ab330bfc00a87', commit_message='lsmille/lora_evo_ta_all_layers_15', commit_description='', oid='461cdac2fba3b8f48ed2f398eb8ab330bfc00a87', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
split_dataset['train'].to_json(MODEL_SAVE_FOLDER_NAME+"_train.jsonl")
split_dataset['test'].to_json(MODEL_SAVE_FOLDER_NAME+"_test.jsonl")

Creating json from Arrow format:   0%|          | 0/8 [00:00<?, ?ba/s]

Creating json from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

21897405

# ***Validation***

Restart session and download finetuned model

In [None]:
!pip install -q -U trl transformers accelerate peft
!pip install -q datasets bitsandbytes einops
!pip install -q flash_attn
!pip install accelerate
#!pip install -i https://pypi.org/simple/ bitsandbytes

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig
import gc



In [None]:
model_name = 'lsmille/lora_evo_ta_all_layers_1'

#config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
model_reloaded = AutoModelForCausalLM.from_pretrained(model_name,
                                                      trust_remote_code=True).to('cuda:0')
model_reloaded.config.use_cache = True
model_reloaded.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


adapter_config.json:   0%|          | 0.00/6.57k [00:00<?, ?B/s]



Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/69.0 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/71.4M [00:00<?, ?B/s]

StripedHyenaModelForCausalLM(
  (backbone): StripedHyena(
    (embedding_layer): VocabParallelEmbedding(512, 4096)
    (norm): RMSNorm()
    (unembed): VocabParallelEmbedding(512, 4096)
    (blocks): ModuleList(
      (0-7): 8 x ParallelGatedConvBlock(
        (pre_norm): RMSNorm()
        (post_norm): RMSNorm()
        (filter): ParallelHyenaFilter()
        (projections): lora.Linear(
          (base_layer): Linear(in_features=4096, out_features=12288, bias=True)
          (lora_dropout): ModuleDict(
            (default): Dropout(p=0.05, inplace=False)
          )
          (lora_A): ModuleDict(
            (default): Linear(in_features=4096, out_features=16, bias=False)
          )
          (lora_B): ModuleDict(
            (default): Linear(in_features=16, out_features=12288, bias=False)
          )
          (lora_embedding_A): ParameterDict()
          (lora_embedding_B): ParameterDict()
        )
        (out_filter_dense): lora.Linear(
          (base_layer): Linear(in_featur

In [None]:
evo_tokenizer=AutoTokenizer.from_pretrained("togethercomputer/evo-1-8k-base",trust_remote_code=True)
#tokenizer_reload = AutoTokenizer.from_pretrained(model_name,trust_remote_code=True)
evo_tokenizer.add_special_tokens({'eos_token': ' '})
evo_tokenizer.pad_token = evo_tokenizer.eos_token

In [None]:
prompt='!'
run_model(model_reloaded, evo_tokenizer, prompt)

Setting `pad_token_id` to `eos_token_id`:32 for open-end generation.


Initializing inference params...


  z_pre = fir_fn(


!TGAACCCCCATGCCGCCACGACCGCATCACTATAAATTTGGTATCCCGAATGTATAGGCTAAAAATGATTAATTATGTAAAATGGTTATATATTTATTATTTAGCAAACATTGTTTTTTATTTATTTTTTTACTAATTATATTCCATTCTATTAAATTATAATTTTTTTCACCTTATATTGTATTTAATTATTTTTTTTTTTCTTCTTTTTTATCTCTATTTTTGTCTTCTTTTTTTGTTTTTTTTCCTGGATCTCACATGTCTTTTATCCATCATGCTTTTTTCTTTTTTTTCATAATCTGTCGTCTTTATTTCTATTTCTTATAATTCACACTTAATAGCCATAAATAGCCAATTTTATTCTTTTTTTTTTTTAATTATTTATATATACAGTATATAACTCAATTTTTCTTTTATTTTTTAAAAAAATAGTATATTTTCTTTTTCTTCTTCTTTTTTTCTTTTCTCTTATTTTTTTTTTTTTTTTTTATTTCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTCTTTTTTTTTCTTTTTTTTTTTTTATCTTTTTTTCTTTTTTTTTTTTTTTCTTTTTTTTTTTTTTTTTTTTTTCTTTTTTTTTTTTTTTTTTTTTTTTTTCTCTCTTTTTTTTTTTTTTTTCTTTTTTTTTTTTTTTTTTTTTTTTTTATTTTTTTTTTTTTTTTTTTTTTTTTATCTCTCTCTTTTTCTTCTCTTTTTTTCTCTCTCTTCTTTTTTTTTTTTCTTTTTTTTTTCTCTCTTTTTCTCTCTTTTTTCTCCCCCTTTTTCACCCCCCAATCACAAAACACAGCATCTTTTTTTTTTCTCTCCCCCCACTCACCACTCTCCACCCACACGGCCCACTCTTTTTCCCCCCATCCCCCCATCATCTCCCCCCCTCTCCATTCCCACCTCCCCCCCAATTCTCCTTTTTTTTCTACTCCCCACCCTTCCACACACTACCCCCCCCCCCCTTCCGCTCC

In [None]:
# from trl import SFTTrainer

# max_seq_length = 1024

# trainer = SFTTrainer(
#     model=evo.model,
#     train_dataset=dataset,
#     eval_dataset=dataset_test,
#     peft_config=lora_config,
#     dataset_text_field="text",
#     max_seq_length=max_seq_length,
#     tokenizer=evo.tokenizer,
#     args=training_args,
# )

In [None]:
#trainer.train()

In [None]:
#this is if you dont load lora in model same as commenst two above
#also change get_peft_model

# from trl import SFTTrainer

# max_seq_length = 512

# trainer = SFTTrainer(
#     model=evo.model,
#     train_dataset=dataset,
#     peft_config=lora_config, #lora config is here
#     dataset_text_field="text",
#     max_seq_length=max_seq_length,
#     tokenizer=evo.tokenizer,
#     args=training_args,
# )

# trainer.train()

#try same inference but with new model