In [None]:
!pip install -q  torch pandas peft==0.4.0 bitsandbytes==0.40.2 transformers==4.35.2 trl==0.4.7 accelerate

import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
import pandas as pd

In [None]:
# Dataset
training_data = load_dataset("parquet",data_files="data.parquet",split='train')


# Model and tokenizer names
base_model_name = "NousResearch/Llama-2-7b-chat-hf"
refined_model = "llama-2-7b-advisor-high-rank-high-learning-rate"

# Tokenizer
llama_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"  # Fix for fp16

# Quantization Config
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

# Model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quant_config,
    device_map={"": 0}
)
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [None]:
# LoRA Config
peft_parameters = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM"
)

# Training Params
train_params = TrainingArguments(
    output_dir="./results_modified",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-2,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard"
)

# Trainer
fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=training_data,
    peft_config=peft_parameters,
    dataset_text_field="train",
    tokenizer=llama_tokenizer,
    args=train_params
)

# Training
fine_tuning.train()

# Save Model
fine_tuning.model.save_pretrained(refined_model)

You are using 8-bit optimizers with a version of `bitsandbytes` < 0.41.1. It is recommended to update your version as a major bug has been fixed in 8-bit optimizers.
You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
25,8.4532
50,0.0
75,0.0
100,0.0
125,0.0
150,0.0
175,0.0
200,0.0
225,0.0
250,0.0





No chat template is defined for this tokenizer - using the default template for the LlamaTokenizerFast class. If the default is not appropriate for your model, please set `tokenizer.chat_template` to an appropriate template. See https://huggingface.co/docs/transformers/main/chat_templating for more information.



 I apologize, but it is generally not possible to register for two courses that take place at the same time. However, there may be some exceptions depending on the specific course and institution. Here are some possible scenarios:

1. Overlapping courses: If two courses are offered at the same time but are not identical, you may be able to register for both
 The number of credit hours required to obtain a Master's of Science in Electrical Engineering (MSEEE) degree can vary depending on the institution and the specific program. Here are some general guidelines:

1. Typical Requirements: A typical MSEEE program requires 30-36 credit hours of coursework
 Electrical engineering is a broad field that encompasses a wide range of topics and subfields. Here are some of the main threads or areas of focus within electrical engineering:

1. Power Systems: This thread deals with the generation, transmission, and distribution of electrical power, including the design of power plants, transmission 

KeyboardInterrupt: ignored

In [None]:
model = PeftModel.from_pretrained(base_model, refined_model)
model = model.merge_and_unload()

json = pd.read_json('validation.json')
questions = json['questions']

text_gen = pipeline(task="conversational", model=model, tokenizer=llama_tokenizer, max_length=100)
for question in questions:
  query = question['question']
  ques = [{
      "role": "user",
      "content": query,
  }]
  # output = text_gen(f"<s>[INST] {query} [/INST]")
  result = text_gen(ques)
  print(result.generated_responses[-1])

# model.push_to_hub(refined_model, use_temp_dir=False)
# llama_tokenizer.push_to_hub(refined_model, use_temp_dir=False)

ValueError: ignored