<a href="https://colab.research.google.com/github/bivekSapkota/datasetjsp/blob/NursingScheduling/Lora_finetune_Nursing_scheduling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets
!pip install transformers
!pip install peft
!pip install accelerate
!pip install -U bitsandbytes
!pip install tensorboard

from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    AutoTokenizer,
    TrainingArguments,
    set_seed
)
import transformers
import torch
from functools import partial
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Assuming 'utils' directory is in the same directory as this script
# and contains 'data_preprocessing.py' and '__init__.py'
try:
    from utils.data_preprocessing import preprocess_dataset
except ModuleNotFoundError:
    print("Could not import 'preprocess_dataset' from 'utils.data_preprocessing'.")
    print("Please ensure that:")
    print("- The 'utils' directory exists in the same directory as this script.")
    print("- The 'utils' directory contains a file named 'data_preprocessing.py'.")
    print("- The 'utils' directory contains an empty file named '__init__.py'.")
    print("- The 'preprocess_dataset' function is defined within 'data_preprocessing.py'.")
    # You can add more detailed troubleshooting steps here
    raise  # Re-raise the exception to stop execution

from utils.helping_functions import print_number_of_trainable_model_parameters



In [None]:
seed = 42
set_seed(seed)
#tensorboard --logdir=peft-phi2-jssp-training-consise/logs

custom_dataset_name = './JSSP prompt sample.json'

dataset = load_dataset("json", data_files=custom_dataset_name)
print(dataset)

compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=False,
    )

#use device_map = {"": 0} if you want to train on specific GPU
device_map="cuda",

model_name='microsoft/Phi-3-mini-128k-instruct'

original_model = AutoModelForCausalLM.from_pretrained(model_name,
                                                      device_map='auto',
                                                      quantization_config=bnb_config,
                                                      trust_remote_code=True,
                                                      use_auth_token=False,
                                                      )

tokenizer = AutoTokenizer.from_pretrained(model_name,trust_remote_code=True)#,padding_side="left",add_eos_token=True,add_bos_token=True,use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.model_max_length = 40000
# tokenizer.pad_token = tokenizer.unk_token  # use unk rather than eos token to prevent endless generation
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
tokenizer.padding_side = 'right'


eval_tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,)# add_bos_token=True, use_fast=False)
eval_tokenizer.pad_token = eval_tokenizer.unk_token
# tokenizer.pad_token = tokenizer.eos_token

DatasetDict({
    train: Dataset({
        features: ['num_jobs', 'num_machines', 'prompt_jobs_first', 'prompt_machines_first', 'output'],
        num_rows: 14
    })
})




model.safetensors.index.json:   0%|          | 0.00/16.3k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/3.44k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.94M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/599 [00:00<?, ?B/s]

In [None]:
def gen(model,p, maxlen=1000, sample=False):
    """
    Generates text using the model based on the provided prompt.

    Args:
        model: The pre-trained language model.
        p (str): The prompt text.
        maxlen (int, optional): Maximum length of the generated text. Defaults to 1000.
        sample (bool, optional): Whether to use sampling. Defaults to False.

    Returns:
        list: A list of generated text sequences.
    """
    toks = eval_tokenizer(p, return_tensors="pt")
    res = model.generate(**toks.to("cuda"), max_new_tokens=maxlen, do_sample=sample,num_return_sequences=1).to('cpu')
    return eval_tokenizer.batch_decode(res,skip_special_tokens=True)


index = 0

VAL_SET_SIZE = 4

train_val = dataset["train"].train_test_split(
    test_size=VAL_SET_SIZE, shuffle=True, seed=42
)
dataset = train_val

prompt = dataset['test'][index]['prompt_machines_first']
summary = dataset['test'][index]['output']

formatted_prompt = f"Instruct: Provide a schedule for the following JSSP problem .\n{prompt}\nOutput:\n"
res = gen(original_model,formatted_prompt,2000,)
#print(res[0])
output = res[0].split('Output:\n')[1]

dash_line = '-'.join('' for x in range(100))
print(dash_line)
print(f'INPUT PROMPT:\n{formatted_prompt}')
print(dash_line)
print(f'BASELINE SOLUTION:\n{summary}\n')
print(dash_line)
print(f'MODEL GENERATION - ZERO SHOT:\n{output}')




max_length = 40000
print('Max length to be used is ; ', max_length)

train_dataset = preprocess_dataset(tokenizer, max_length,seed, dataset['train'])
eval_dataset = preprocess_dataset(tokenizer, max_length,seed, dataset['test'])



The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
`get_max_cache()` is deprecated for all Cache classes. Use `get_max_cache_shape()` instead. Calling `get_max_cache()` will raise error from v4.48


---------------------------------------------------------------------------------------------------
INPUT PROMPT:
Instruct: Provide a schedule for the following JSSP problem .
Optimize schedule for 7 Jobs across 7 Machines to minimize makespan. Each job involves a series of Operations needing specific machines and times. Operations are processed in order, without interruption, on a single Machine at a time.

Problem: 

 Machine 0 is used for the following Operations:
  Job 0 Operation 0 duration 35 mins.
  Job 1 Operation 0 duration 61 mins.
  Job 2 Operation 0 duration 5 mins.
  Job 3 Operation 1 duration 51 mins.
  Job 5 Operation 1 duration 44 mins.
  Job 4 Operation 2 duration 2 mins.
  Job 6 Operation 5 duration 38 mins.


 Machine 1 is used for the following Operations:
  Job 4 Operation 1 duration 90 mins.
  Job 0 Operation 2 duration 16 mins.
  Job 1 Operation 4 duration 57 mins.
  Job 2 Operation 4 duration 46 mins.
  Job 3 Operation 4 duration 5 mins.
  Job 5 Operation 4 dura

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Preprocessing dataset...


Map:   0%|          | 0/4 [00:00<?, ? examples/s]

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

In [None]:
print(dataset)
# print(train_dataset["text"][0])

DatasetDict({
    train: Dataset({
        features: ['num_jobs', 'num_machines', 'prompt_jobs_first', 'prompt_machines_first', 'output'],
        num_rows: 10
    })
    test: Dataset({
        features: ['num_jobs', 'num_machines', 'prompt_jobs_first', 'prompt_machines_first', 'output'],
        num_rows: 4
    })
})


In [None]:
print(f"Shapes of the datasets:")
print(f"Training: {train_dataset.shape}")
print(f"Validation: {eval_dataset.shape}")
print(train_dataset)

Shapes of the datasets:
Training: (10, 6)
Validation: (4, 6)
Dataset({
    features: ['num_jobs', 'num_machines', 'prompt_jobs_first', 'text', 'input_ids', 'attention_mask'],
    num_rows: 10
})


In [None]:
# #### 8. Setup the PEFT/LoRA model for Fine-Tuning
# Now, let's perform Parameter Efficient Fine-Tuning (PEFT) fine-tuning. PEFT is a form of instruction fine-tuning that is much more efficient than full fine-tuning.
# PEFT is a generic term that includes Low-Rank Adaptation (LoRA) and prompt tuning (which is NOT THE SAME as prompt engineering!). In most cases, when someone says PEFT, they typically mean LoRA.
# LoRA, in essence, enables efficient model fine-tuning using fewer computational resources, often achievable with just a single GPU. Following LoRA fine-tuning for a specific task or use case,
# the outcome is an unchanged original LLM and the emergence of a considerably smaller "LoRA adapter," often representing a single-digit percentage of the original LLM size (in MBs rather than GBs).
#
# During inference, the LoRA adapter must be combined with its original LLM. The advantage lies in the ability of many LoRA adapters to reuse the original LLM, thereby reducing overall memory
# requirements when handling multiple tasks and use cases.
#
# Note the rank (r) hyper-parameter, which defines the rank/dimension of the adapter to be trained.
# r is the rank of the low-rank matrix used in the adapters, which thus controls the number of parameters trained.
# A higher rank will allow for more expressivity, but there is a compute tradeoff.
#
# alpha is the scaling factor for the learned weights. The weight matrix is scaled by alpha/r, and thus a higher value for alpha assigns more weight to the LoRA activations.


print(print_number_of_trainable_model_parameters(original_model))



config = LoraConfig(
    r=128, #Rank
    lora_alpha=256,
    target_modules=[
        'qkv_proj',
        'o_proj',
        'fc1',
        'fc2',
    ],
    bias="none",
    lora_dropout=0.05,  # Conventional
    task_type="CAUSAL_LM",
)

# 1 - Enabling gradient checkpointing to reduce memory usage during fine-tuning
original_model.gradient_checkpointing_enable()

# 2 - Using the prepare_model_for_kbit_training method from PEFT
original_model = prepare_model_for_kbit_training(original_model)

peft_model = get_peft_model(original_model, config)


# Once everything is set up and the base model is prepared, we can use the print_trainable_parameters()
# helper function to see how many trainable parameters are in the model.

print(print_number_of_trainable_model_parameters(peft_model))


output_dir = './testing_final_code_peft-phi3-jssp/'


MICRO_BATCH_SIZE = 2
BATCH_SIZE = 64
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
LOG_STEP = 50
peft_training_args = TrainingArguments(
    output_dir = output_dir,
    warmup_steps=1,
    per_device_train_batch_size=MICRO_BATCH_SIZE,
    per_device_eval_batch_size=MICRO_BATCH_SIZE,   # batch size for evaluation*
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    # max_steps=3,
    num_train_epochs=2,
    learning_rate=2e-4,
    optim="paged_adamw_8bit",
    logging_steps=LOG_STEP,
    logging_dir=output_dir+"logs",
    save_strategy="steps",
    save_steps=LOG_STEP,
    evaluation_strategy="steps",
    eval_steps=LOG_STEP,
    do_eval=True,
    gradient_checkpointing=True,
    # report_to="tensorboard",
    overwrite_output_dir = 'True',
    group_by_length=True,
    save_total_limit=50,
    # output_dir="phi2_lora-jssp_machines_first_lora_r_{32}",
    fp16=True,
)

peft_model.config.use_cache = False

peft_trainer = transformers.Trainer(
    model=peft_model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    args=peft_training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

peft_training_args.device



trainable model parameters: 197200896
all model parameters: 2009140224
percentage of trainable model parameters: 9.82%
trainable model parameters: 75497472
all model parameters: 2084637696
percentage of trainable model parameters: 3.62%




device(type='cuda', index=0)

In [None]:
peft_trainer.train()

# peft_trainer.train(resume_from_checkpoint=True)

# Free memory for merging weights
del original_model
del peft_trainer
torch.cuda.empty_cache()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


OutOfMemoryError: CUDA out of memory. Tried to allocate 4.04 GiB. GPU 0 has a total capacity of 14.75 GiB of which 2.71 GiB is free. Process 2268 has 12.03 GiB memory in use. Of the allocated memory 5.94 GiB is allocated by PyTorch, and 5.96 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)