In [1]:
#fine-tuning
import pandas as pd
#from finetuning_prompting import *
import os
import torch
from datasets import load_dataset
from transformers import (
  AutoModelForCausalLM,
  AutoTokenizer,
  BitsAndBytesConfig,
  TrainingArguments,
  pipeline,
  logging,
)
from peft import LoraConfig
from peft import prepare_model_for_kbit_training
from trl import SFTTrainer
import dictionaries

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Preparing Model

In [2]:
torch.cuda.is_available()

True

In [3]:


# Model from Hugging Face hub
base_model = "meta-llama/Llama-2-13b-chat-hf"
print(base_model)

# Fine-tuned model
new_model = "llama-2-13b-datacamp_v6_ENG_ONLYPRES"

compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
  load_in_4bit=True,
  bnb_4bit_quant_type="nf4",
  bnb_4bit_compute_dtype=compute_dtype,
  #bnb_4bit_compute_dtype=torch.bfloat16
  bnb_4bit_use_double_quant=False
)

model = AutoModelForCausalLM.from_pretrained(
  base_model,
  trust_remote_code=True,
  quantization_config=quant_config,
  device_map= "auto"
  #device_map= {"": 0},
)
model.config.use_cache = False
model.config.pretraining_tp = 1


tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

peft_params = LoraConfig(
  lora_alpha=32,
  lora_dropout=0.1,
  r=16, #cambiar
  bias="none",
  task_type="CAUSAL_LM",
  target_modules=[
    "q_proj",
    "k_proj",
    "v_proj",
    "o_proj",
  ]
)

meta-llama/Llama-2-13b-chat-hf


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

# Dataset

In [7]:

from datasets import load_dataset
from datasets import concatenate_datasets
#dataset_89 = load_dataset("json", data_files="translated_data_89_pres.json", split = "train")
#dataset_88 = load_dataset("json", data_files="translated_data_88_pres.json", split = "train")
dataset = load_dataset("json", data_files="train_2612_ENG_ONLYPRES.json", split = "train")
#dataset = concatenate_datasets([dataset_89, dataset_88])

Generating train split: 0 examples [00:00, ? examples/s]

# Training

In [8]:
dir_name = "finetuning-13b_datacamp_v6_ENG_ONLYPRES"
OUTPUT_DIR = "./" + dir_name

In [None]:
%load_ext tensorboard
%tensorboard --logdir dir_name/runs

In [9]:
training_arguments = TrainingArguments(
  output_dir=OUTPUT_DIR,
  num_train_epochs=1,
  per_device_train_batch_size=4,
  gradient_accumulation_steps=1,
  optim="paged_adamw_32bit",
  save_steps=25,
  logging_steps = 25,
  learning_rate = 2e-5,
  weight_decay = 0.001,
  fp16=False,
  bf16=False,
  max_grad_norm=0.3,
  max_steps=-1,
  warmup_ratio=0.01,
  group_by_length=True,
  lr_scheduler_type="constant",
  gradient_checkpointing=True,
  report_to="tensorboard"
)

trainer = SFTTrainer(
  model=model,
  train_dataset = dataset,
  peft_config=peft_params,
  tokenizer=tokenizer,
  dataset_text_field="text",
  args=training_arguments,
)

trainer.train()
trainer.model.save_pretrained(new_model)
trainer.tokenizer.save_pretrained(new_model)

Map:   0%|          | 0/2612 [00:00<?, ? examples/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
25,2.8689
50,1.9863
75,0.9532
100,0.2592
125,0.1812
150,0.1021
175,0.1146
200,0.0834
225,0.0834
250,0.0615


('llama-2-13b-datacamp_v6_ENG_ONLYPRES/tokenizer_config.json',
 'llama-2-13b-datacamp_v6_ENG_ONLYPRES/special_tokens_map.json',
 'llama-2-13b-datacamp_v6_ENG_ONLYPRES/tokenizer.model',
 'llama-2-13b-datacamp_v6_ENG_ONLYPRES/added_tokens.json',
 'llama-2-13b-datacamp_v6_ENG_ONLYPRES/tokenizer.json')

In [None]:
from tensorboard import notebook
log_dir = dir_name + "/runs"
notebook.start("--logdir {} --port 4000".format(log_dir))