## setup

In [1]:
from unsloth.chat_templates import standardize_sharegpt, get_chat_template, train_on_responses_only
from unsloth import FastLanguageModel
from transformers import TrainingArguments, DataCollatorForSeq2Seq, AutoTokenizer, AutoModelForCausalLM, Trainer, DataCollatorForLanguageModeling, BitsAndBytesConfig 
from huggingface_hub import login
from dotenv import load_dotenv
from datasets import load_dataset
import os
import wandb
import json
from trl import SFTTrainer #, SFTConfig
from pipeline.main import run_eval
from peft import LoraConfig, get_peft_model, PeftModel
from pipeline.main import run_eval
from IPython.display import display, HTML
import torch as t
from pipeline.constants import LANG_PREFIX, HUMANEVAL_PROMPT

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [2]:
load_dotenv()
login(token = os.environ['HF_TOKEN'])
wandb.login()

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
[34m[1mwandb[0m: Currently logged in as: [33matharva_nihalani[0m ([33matharva_nihalani-brown-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

## Unsloth FT

### Load model

In [None]:
max_seq_length = 32768 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 8, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
)

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

### Load Data

In [None]:
with open("/root/srf-project/data/codenet_questions/filtered_problem_descriptions.json", "r") as f:
    problem_descriptions = json.load(f)

def get_filtered_dataset(lang):
    dataset = load_dataset('iNeil77/CodeNet', lang, split='train')
    dataset = dataset.select_columns(['p_id', 'language', 'status', 'code'])
    dataset = dataset.filter(lambda x: x['status']=='Accepted')
    dataset = dataset.filter(lambda x: x['p_id'] in problem_descriptions.keys())

    return dataset

def get_train_test(dataset, train_size=10000, test_size=500):
    shuffled = dataset.shuffle(seed=47)
    train_set = shuffled.select(range(train_size))
    test_set = shuffled.select(range(train_size, train_size + test_size))

    return train_set, test_set

def add_conversations(row):
    preface = 'Read the following problem description. Fully implement a solution in Java. Your response should only contain the code, no explanations.\n\n'
    problem_description = problem_descriptions[row['p_id']]
    code = row['code']

    row['conversations'] = [
        {
            'role': 'user', 
            'content': preface + problem_description,
        },
        {
            'role': 'assistant', 
            'content': code,
        }
    ]

    return row

def chat_format(row):
    convos = row['conversations']
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { 'text' : texts }

In [None]:
codenet = get_filtered_dataset('Java')
codenet_train, codenet_test = get_train_test(codenet, 40)

codenet_train = codenet_train.map(add_conversations, remove_columns=['p_id', 'language', 'status', 'code'])
codenet_test = codenet_test.map(add_conversations, remove_columns=['p_id', 'language', 'status', 'code'])

codenet_train = codenet_train.map(chat_format, batched=True)
codenet_test = codenet_test.map(chat_format, batched=True)

In [None]:
def humaneval_add_convs(row):
    last_line = row['prompt'].splitlines()[-1]
    final_code = last_line + '\n' + row['canonical_solution']

    row['conversations'] = [
        {
            'role': 'user', 
            'content': HUMANEVAL_PROMPT + LANG_PREFIX['cpp'] + '\n' + row['prompt'] 
        },
        {
            'role': 'assistant', 
            'content': final_code,
        }
    ]

    return row

In [None]:
humaneval = load_dataset('THUDM/humaneval-x', trust_remote_code=True, split='test', name='cpp')
humaneval_train = humaneval.train_test_split(test_size=0.25)['train']
humaneval_test = humaneval.train_test_split(test_size=0.25)['test']

humaneval_train = humaneval_train.map(humaneval_add_convs, remove_columns=['task_id', 'prompt', 'declaration', 'canonical_solution', 'test', 'example_test'])
humaneval_test = humaneval_test.map(humaneval_add_convs, remove_columns=['task_id', 'prompt', 'declaration', 'canonical_solution', 'test', 'example_test'])

humaneval_train = humaneval_train.map(chat_format, batched=True)
humaneval_test = humaneval_test.map(chat_format, batched=True)


In [None]:
codenet_train

### Finetuning

In [None]:
def get_save_dir():
    idx = 0
    base_path = '/root/srf-project/playground/llama3-java-finetune-'
    already_existing = True

    while already_existing:
        idx += 1
        path = base_path + str(idx)
        
        if not os.path.exists(path):
            already_existing = False
            os.mkdir(path)
            return path
        
output_path = get_save_dir()
train_set = humaneval_train

In [None]:
training_args = TrainingArguments(
    per_device_train_batch_size = 8,
    # auto_find_batch_size = True,
    gradient_accumulation_steps = 1,
    warmup_ratio = 0.05,
    num_train_epochs = 5,
    # eval_strategy = 'steps',
    # eval_steps = 0.05,
    learning_rate = 2e-4,
    bf16 = True,
    logging_steps = 1,
    # logging_first_step=True,
    optim = "adamw_8bit",
    weight_decay = 0.01,
    lr_scheduler_type = "linear",
    seed = 3407,
    # save_steps = 0.2,
    # save_total_limit = 3,
    output_dir = output_path,
    report_to = "wandb", 
    run_name = 'test_run'
)

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_set,
    # eval_dataset = test_set,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 8,
    args = training_args,
    # packing = False, 
)

In [None]:
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

In [None]:
trainer_stats = trainer.train()

In [None]:
artifact = wandb.Artifact(name="final_model", type="model")
artifact.add_dir(output_path)
wandb.run.log_artifact(artifact)

wandb.finish()

## Previous FT Code

### Load Model / Dataset

In [None]:
'''
model_name = 'meta-llama/Llama-3.1-8B-Instruct'

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=t.float16,
    bnb_4bit_use_double_quant=True,
    llm_int8_threshold=6.0,
)

tokenizer = AutoTokenizer.from_pretrained(model_name, pad_side="left")
model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    device_map="auto", 
    quantization_config=bnb_config,
)
tokenizer.pad_token = tokenizer.eos_token'''

# model_name = 'unsloth/Meta-Llama-3.1-8B-Instruct'
model_name = 'unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit'


tokenizer = AutoTokenizer.from_pretrained(model_name, pad_side="left")
tokenizer.pad_token = tokenizer.eos_token


model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    device_map="auto", 
    torch_dtype="auto",
)

In [None]:
with open("/root/srf-project/data/codenet_questions/filtered_problem_descriptions.json", "r") as f:
    problem_descriptions = json.load(f)

def get_filtered_dataset(lang):
    dataset = load_dataset('iNeil77/CodeNet', lang, split='train')
    dataset = dataset.select_columns(['p_id', 'language', 'status', 'code'])
    dataset = dataset.filter(lambda x: x['status']=='Accepted')
    dataset = dataset.filter(lambda x: x['p_id'] in problem_descriptions.keys())

    return dataset

def get_train_test(dataset, train_size=10000, test_size=500):
    shuffled = dataset.shuffle(seed=47)
    train_set = shuffled.select(range(train_size))
    test_set = shuffled.select(range(train_size, train_size + test_size))

    return train_set, test_set

filtered = get_filtered_dataset('Java')
train_set, test_set = get_train_test(filtered, test_size=8)

In [None]:
def add_description(row):
    description = problem_descriptions[row['p_id']]
    row['description'] = description
    return row

train_set = train_set.map(add_description)
test_set = test_set.map(add_description)

In [None]:
def add_final_prompt(row):
    description = row['description'].strip()
    code = row['code']

    final_prompt = '\n'.join([description, '<answer>', code, '</answer>'])
    row['final_prompt'] = final_prompt

    return row

train_set = train_set.map(add_final_prompt)
test_set = test_set.map(add_final_prompt)

In [None]:
def tokenize(record):
    final_prompt = record['final_prompt']
    msg = [
        {'role': 'user', 'content': final_prompt}
    ]

    tokens = tokenizer.apply_chat_template(msg, add_generation_prompt=True, return_dict=True)

    return tokens

final_train_set = train_set.map(tokenize, num_proc=32)
final_test_set = test_set.map(tokenize, num_proc=32)

final_train_set = final_train_set.select_columns(['input_ids', 'attention_mask'])
final_test_set = final_test_set.select_columns(['input_ids', 'attention_mask'])

### FineTuning

In [None]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=64,
    target_modules=["q_proj", "v_proj"],  
    lora_dropout=0.00,
    bias="none",
    task_type="CAUSAL_LM"
)

peft_model = get_peft_model(model, lora_config)

In [None]:
def get_save_dir():
    idx = 0
    base_path = '/root/srf-project/playground/llama3-java-finetune-'
    already_existing = True

    while already_existing:
        idx += 1
        path = base_path + str(idx)
        
        if not os.path.exists(path):
            already_existing = False
            os.mkdir(path)
            return path
        
output_path = get_save_dir()

training_args = TrainingArguments(
    output_dir=output_path,
    # eval_strategy='steps',
    # eval_steps=0.1,
    # eval_on_start=True,
    per_device_train_batch_size=1, 
    # auto_find_batch_size=True,
    gradient_accumulation_steps=1,
    # gradient_checkpointing=True,
    torch_empty_cache_steps=1,
    dataloader_num_workers=16,
    dataloader_persistent_workers=True,
    learning_rate=5e-4,
    num_train_epochs=1,  
    bf16=True,
    save_steps=0.2,
    save_total_limit=3,
    logging_steps=0.02,
    report_to="wandb",
    logging_first_step=True,
    run_name='chat_template',
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,  # For causal LM
)

trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=final_train_set,
    eval_dataset=final_test_set,
    data_collator=data_collator,
)

In [None]:
trainer.train()

In [None]:
wandb.finish()

### RunEval

In [None]:
from pipeline.main import run_eval

args = {
    'model': 'hf/local',
    'model_path': '/root/srf-project/playground/llama3-java-finetune-a/checkpoint-625',
    'device': 'auto',
    'torch_dtype': 'auto'
}

run_eval('java', args, samples=3)

## RunEval

In [None]:
# api = wandb.Api()
# artifact_name = ...

# my_artifact = api.artifact(artifact_name, 'model')
# my_artifact.download('llama3_test_dir')

In [None]:
base_1 = AutoModelForCausalLM.from_pretrained('unsloth/Meta-Llama-3.1-8B-Instruct', device_map="auto", torch_dtype='auto')
base_2 = AutoModelForCausalLM.from_pretrained('unsloth/Meta-Llama-3.1-8B-Instruct', device_map="auto", load_in_4bit=True)
tokenizer = AutoTokenizer.from_pretrained('unsloth/Meta-Llama-3.1-8B-Instruct')

model_1 = PeftModel.from_pretrained(base_1, '/root/srf-project/playground/llama3-java-finetune-2/checkpoint-80')
model_2 = PeftModel.from_pretrained(base_2, '/root/srf-project/playground/llama3-java-finetune-2/checkpoint-80')

model_1 = model_1.merge_and_unload()
model_2 = model_2.merge_and_unload()

In [None]:
model_1.save_pretrained('llama3_test_1')
tokenizer.save_pretrained('llama3_test_1')

model_2.save_pretrained('llama3_test_2')
tokenizer.save_pretrained('llama3_test_2')

In [8]:
args = {
    'model': 'hf/local',
    'model_path': '/root/srf-project/playground/llama3_test_2/',
    'device': 'auto',
    'torch_dtype': 'auto'
}

run_eval('cpp', args, samples=120)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Output()

## GPU Deets

In [None]:
import torch as t
import gc

free_memory, total_memory = t.cuda.mem_get_info()

# Convert bytes to GB
free_memory_gb = free_memory / (1024 * 1024 * 1024)
total_memory_gb = total_memory / (1024 * 1024 * 1024)
mem_used = t.cuda.device_memory_used() / (1024 ** 3)

print(f"Free GPU Memory: {free_memory_gb:.2f} GB")
print(f"Total GPU Memory: {total_memory_gb:.2f} GB")
print(f'Memory Used: {mem_used:.2f} GB')

In [None]:
print(t.cuda.memory_allocated() / 1024**2, "MB allocated")
print(t.cuda.memory_reserved() / 1024**2, "MB reserved")

In [None]:
print(t.cuda.memory_allocated() / 1024**2, "MB allocated")
print(t.cuda.memory_reserved() / 1024**2, "MB reserved")