## setup

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling, BitsAndBytesConfig
from huggingface_hub import login
from dotenv import load_dotenv
from datasets import load_dataset
import os
import wandb
from peft import LoraConfig, get_peft_model
from pipeline.main import run_eval

In [None]:
load_dotenv()
login(token = os.environ['HF_TOKEN'])
wandb.login()

## FT Dataset

### Load Model / Dataset

In [None]:
'''
model_name = 'meta-llama/Llama-3.1-8B-Instruct'

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=t.float16,
    bnb_4bit_use_double_quant=True,
    llm_int8_threshold=6.0,
)

tokenizer = AutoTokenizer.from_pretrained(model_name, pad_side="left")
model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    device_map="auto", 
    quantization_config=bnb_config,
)
tokenizer.pad_token = tokenizer.eos_token'''

model_name = 'unsloth/Meta-Llama-3.1-8B-Instruct'
# model_name = 'unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit'


tokenizer = AutoTokenizer.from_pretrained(model_name, pad_side="left")
tokenizer.pad_token = tokenizer.eos_token


model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    device_map="auto", 
    torch_dtype="auto",
)

In [None]:
def get_datasets(lang, train_size=10000, test_size=500):
    dataset = load_dataset('iNeil77/CodeNet', lang, split='train')
    dataset = dataset.select_columns(['p_id', 'language', 'status', 'code'])
    dataset = dataset.filter(lambda x: x['status']=='Accepted')
    shuffled = dataset.shuffle(seed=47)

    train_set = shuffled.select(range(train_size))
    test_set = shuffled.select(range(train_size, train_size + test_size))

    return train_set, test_set

train_set, test_set = get_datasets('Java')

In [None]:
def tokenize(record):
    code = record['code']
    tokens = tokenizer(
        code, 
        truncation=True,
        max_length=1024,
    )

    return tokens

train_set = train_set.map(tokenize, batched=True, num_proc=32)
train_set = train_set.select_columns(['input_ids', 'attention_mask'])

test_set = test_set.map(tokenize, batched=True, num_proc=32)
test_set = test_set.select_columns(['input_ids', 'attention_mask'])


In [None]:
# for i in range(10):
#     idx = random.randint(0, dataset_cpp.num_rows)
#     code = dataset_cpp[idx]['code']
#     print(code)
#     print('--x---x---x--\n')

In [None]:
# api = wandb.Api()
# run = api.run("atharva_nihalani-brown-university/huggingface/diw9fexc")
# metrics_dataframe = run.history()
# # metrics_dataframe.to_csv("metrics.csv")

In [None]:
# # Access the GPU metrics
# gpu_memory_util = metrics_dataframe.get("gpu.0.memory")  # For the first GPU
# gpu_memory_alloc = metrics_dataframe.get("gpu.0.memoryAllocated")
# print(gpu_memory_util)
# print(gpu_memory_alloc)

In [None]:
# print(metrics_dataframe.columns)

### Fine-tune

In [None]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],  
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

In [None]:
training_args = TrainingArguments(
    output_dir="./llama3-java-finetune",
    eval_strategy='steps',
    eval_steps=0.1,
    eval_on_start=True,
    per_device_train_batch_size=8, 
    gradient_accumulation_steps=4,
    dataloader_num_workers=16,
    dataloader_persistent_workers=True,
    learning_rate=2e-4,
    num_train_epochs=1,  
    bf16=True,
    save_steps=0.2,
    save_total_limit=3,
    logging_steps=0.02,
    report_to="wandb",
    logging_first_step=True,
    run_name='temp_run',
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,  # For causal LM
)

In [None]:
def model_init(trial):
    return AutoModelForCausalLM.from_pretrained(
        model_name, 
        device_map="auto", 
        torch_dtype="auto",
    )


trainer = Trainer(
    # model=model,
    model_init=model_init,
    args=training_args,
    train_dataset=train_set,
    eval_dataset=test_set,
    data_collator=data_collator,
)

In [None]:
def wandb_hp_space(trial):
    return {
        "method": "random",
        "metric": {"name": "objective", "goal": "minimize"},
        "parameters": {
            "learning_rate": {"distribution": "uniform", "min": 1e-6, "max": 1e-4},
            "per_device_train_batch_size": {"values": [4, 8]},
            "gradient_accumulation_steps": {"values": [1, 2, 4, 8]},
            # "r": {"values": [2, 4, 8, 16]},
            # "lora_alpha": {"values": [16, 32, 64, 128]},
            # "lora_dropout": {"min": 0.0, "max": 0.2}, 
        },
    }

In [None]:
best_trials = trainer.hyperparameter_search( 
    direction="minimize",
    backend="wandb",
    hp_space=wandb_hp_space,
    n_trials=4,
    # compute_objective=compute_objective,
)

### Train

In [None]:
trainer.train()

In [None]:
wandb.finish()

In [None]:
args = {
    'model': 'hf/local',
    'model_path': '/root/srf-project/test_humaneval-x/llama3-java-finetune/checkpoint-1250',
    'device': 'auto',
    'torch_dtype': 'auto'
}

run_eval('java', model_args=args, epochs=3)

### GPU Deets

In [None]:
import torch as t
import gc

free_memory, total_memory = t.cuda.mem_get_info()

# Convert bytes to GB
free_memory_gb = free_memory / (1024 * 1024 * 1024)
total_memory_gb = total_memory / (1024 * 1024 * 1024)
mem_used = t.cuda.device_memory_used() / (1024 ** 3)

print(f"Free GPU Memory: {free_memory_gb:.2f} GB")
print(f"Total GPU Memory: {total_memory_gb:.2f} GB")
print(f'Memory Used: {mem_used:.2f} GB')

In [None]:
print(t.cuda.memory_allocated() / 1024**2, "MB allocated")
print(t.cuda.memory_reserved() / 1024**2, "MB reserved")