<a href="https://colab.research.google.com/github/harshita23sharma/opensource_llms/blob/main/llama2/Llama_finetuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -qU bitsandbytes \
  transformers==4.31 \
  git+https://github.com/huggingface/peft.git \
  git+https://github.com/huggingface/accelerate.git \
  datasets \
  evaluate \
  trl==0.7.1

## Load dataset

In [None]:

from datasets import load_dataset
dataset = load_dataset("harshita23sh/us-financial-data-transformation", split='train[:10%]')

dataset

In [None]:
#train test split
train_test_ds = dataset.train_test_split(test_size=0.3)
train_test_ds


## Create Instruction finetuning dataset

In [57]:
def format_instruction(text: str, title: str, entities_dict: list):
	return f"""### Instruction:

    <s>[INST] <<SYS>>
    You are an expert in financial news analytics.
    Please find companies, products, technologies and currencies
    in the text and assess sentiments towards them.
    <</SYS>>
    Please analyse the text:
    {text} [/INST]

    ### Summary:
    {title}

    ### JSON Data::
    {entities_dict}
    """.strip()

In [58]:
from torch import cuda, bfloat16
import transformers

model_id = 'meta-llama/Llama-2-7b-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, need auth token for these
hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)



In [59]:
model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)
model.eval()
print(f"Model loaded on {device}")



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Model loaded on cuda:0


In [60]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [61]:

from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=16, #rank (matrix dimension)
    lora_alpha=64,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], #Target module for Llama
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
print(model)

##Train the quantised model

In [62]:
OUTPUT_DIR = "llama2-dinancial-data-analysis"

In [82]:
from transformers import TrainingArguments

training_arguments = TrainingArguments(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    logging_steps=1,
    learning_rate=1e-4,
    fp16=True,
    max_grad_norm=0.3,
    num_train_epochs=2,
    evaluation_strategy="steps",
    eval_steps=0.2,
    warmup_ratio=0.05,
    save_strategy="epoch",
    group_by_length=True,
    output_dir=OUTPUT_DIR,
    # report_to="tensorboard",
    save_safetensors=True,
    lr_scheduler_type="cosine",
    seed=42,
)
model.config.use_cache = False

In [83]:

tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

generate_text = transformers.pipeline(
    model=model, tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    temperature=0.0,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # mex number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausal

In [84]:
def format_instruction(text: str, title: str, entities_dict: list):
	return f"""### Instruction:

    <s>[INST] <<SYS>>
    You are an expert in financial news analytics.
    Please find companies, products, technologies and currencies
    in the text and assess sentiments towards them.
    <</SYS>>
    Please analyse the text:
    {text} [/INST]

    ### Summary:
    {title}

    ### JSON Data::
    {entities_dict}
    """.strip()

In [85]:
def generate_instruction_dataset(data_point):
  return {"text":data_point["text"], "title":data_point["title"], "entities":data_point["entities"], "output":format_instruction(data_point["text"], data_point["title"], data_point["entities"])}

In [86]:
from datasets import Dataset, load_dataset

def process_dataset(data: Dataset):
    return (
        data.shuffle(seed=42)
        .map(generate_instruction_dataset)
    )


In [87]:
train_ds = process_dataset(train_test_ds["train"])
validation_ds = process_dataset(train_test_ds["test"])

In [90]:
from trl import SFTTrainer
trainer = SFTTrainer(
    model=model,
    train_dataset=train_ds,
    eval_dataset=validation_ds,
    peft_config=lora_config,
    dataset_text_field="text",
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=training_arguments,
)

trainer.train()


Map:   0%|          | 0/1895 [00:00<?, ? examples/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss
56,1.4425,1.473395
112,1.4184,1.436726
168,1.6344,1.420434
224,1.4038,1.411068


TrainOutput(global_step=276, training_loss=1.450122477999632, metrics={'train_runtime': 4602.9518, 'train_samples_per_second': 0.96, 'train_steps_per_second': 0.06, 'total_flos': 4.63228276114391e+16, 'train_loss': 1.450122477999632, 'epoch': 1.0})

In [91]:
peft_model_path="./peft-entity-sentiment"

trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)

('./peft-entity-sentiment/tokenizer_config.json',
 './peft-entity-sentiment/special_tokens_map.json',
 './peft-entity-sentiment/tokenizer.json')

In [92]:
from transformers import TextStreamer
model.config.use_cache = True
model.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 4096, padding_idx=0)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear4bit(
                (base_layer

## Save base model merged with Adapter weights

In [93]:

from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer

peft_model_dir = "./peft-entity-sentiment"

# load base LLM model and tokenizer
trained_model = AutoPeftModelForCausalLM.from_pretrained(
    peft_model_dir,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    load_in_4bit=True,
)
tokenizer = AutoTokenizer.from_pretrained(peft_model_dir)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

#Inference

In [111]:
validation_ds[0]["text"][:206]

'REDWOOD CITY, Calif., May 21, 2018 (GLOBE NEWSWIRE) -- Coherus BioSciences, Inc. (Nasdaq:CHRS), today announced it has commenced an underwritten public offering of $75,000,000 of shares of its common stock.'

In [112]:

index = 0

text = validation_ds[0]["text"][:206]
summary = validation_ds[index]["title"]

prompt = f"""
### Instruction:

    <s>[INST] <<SYS>>
    You are an expert in financial news analytics.
    Please find companies, products, technologies and currencies
    in the text and assess sentiments towards them.
    <</SYS>>
    Please analyse the text:
    {text} [/INST]

    ### Summary:

"""

input_ids = tokenizer(prompt, return_tensors='pt',truncation=True).input_ids.cuda()

outputs = trained_model.generate(input_ids=input_ids, max_new_tokens=1000, )
output= tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt):]

dash_line = '-'.join('' for x in range(100))
print(dash_line)
print(f'INPUT PROMPT:\n{prompt}')
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash_line)
print(f'TRAINED MODEL GENERATED TEXT :\n{output}')


---------------------------------------------------------------------------------------------------
INPUT PROMPT:

### Instruction:

    <s>[INST] <<SYS>>
    You are an expert in financial news analytics.
    Please find companies, products, technologies and currencies
    in the text and assess sentiments towards them.
    <</SYS>>
    Please analyse the text:
    REDWOOD CITY, Calif., May 21, 2018 (GLOBE NEWSWIRE) -- Coherus BioSciences, Inc. (Nasdaq:CHRS), today announced it has commenced an underwritten public offering of $75,000,000 of shares of its common stock. [/INST]

    ### Summary:


---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
Coherus Announces Proposed Public Offering of Common Stock

---------------------------------------------------------------------------------------------------
TRAINED MODEL GENERATED TEXT :
e company is offering $75 million in shares of its common stock. This is a neutra

In [114]:

trained_model = AutoPeftModelForCausalLM.from_pretrained(
    peft_model_dir,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
)
# Merge LoRA and base model
merged_model = trained_model.merge_and_unload()

# Save the merged model
merged_model.save_pretrained("merged_model",safe_serialization=True)
tokenizer.save_pretrained("merged_model")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

('merged_model/tokenizer_config.json',
 'merged_model/special_tokens_map.json',
 'merged_model/tokenizer.json')