<a href="https://colab.research.google.com/github/b05902062/TinyLlama/blob/main/sft/FunctionCallingFinetune.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Clone finetuning code.

!git clone https://github.com/b05902062/TinyLlama.git
%cd ./TinyLlama/

In [None]:
# Upgrade some modules to fix finetuning errors.

!pip install evaluate
!pip install --upgrade datasets fsspec

In [None]:
# Finetune for user intention from TinyLlama/TinyLlama-1.1B-Chat-v1.0 checkpoint for 1 epoch.

!python /content/TinyLlama/sft/finetune.py \
    --model_name_or_path TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
    --output_dir ./output/0001_run_instruction_finetune \
    --logging_strategy steps \
    --logging_steps 200 \
    --per_device_train_batch_size 16 \
    --eval_strategy steps \
    --eval_steps 200 \
    --eval_dataset_size 512 \
    --max_eval_samples 512 \
    --per_device_eval_batch_size 16 \
    --save_strategy steps \
    --save_steps 1000 \
    --data_seed 42 \
    --save_total_limit 15 \
    --max_new_tokens 512 \
    --dataloader_num_workers 3 \
    --group_by_length=True \
    --remove_unused_columns False \
    --do_train \
    --do_eval \
    --warmup_ratio 0.05 \
    --lr_scheduler_type cosine \
    --dataset ZihminWang/user-intention \
    --dataset_format ZihminWang/user-intention \
    --source_max_len 1024 \
    --target_max_len 1024 \
    --num_train_epochs 1 \
    --learning_rate 1e-5 \
    --adam_beta2 0.999 \
    --max_grad_norm 1.0 \
    --weight_decay 0.01 \
    --seed 0 \
    --trust_remote_code \
    --report_to tensorboard \
    --load_checkpoint True

In [None]:
# Evaluta models on test split. Automatically use your checkpoint if you have run the above cell. If not, pull from ZihminWang/TinyLlama-1.1B-Chat-v1.0-user-intention-v0.1.

!python /content/TinyLlama/sft/finetune.py \
    --model_name_or_path ZihminWang/TinyLlama-1.1B-Chat-v1.0-user-intention-v0.1 \
    --output_dir ./output/0001_run_instruction_finetune \
    --logging_strategy no \
    --logging_steps 1 \
    --save_strategy no \
    --save_steps 1 \
    --eval_strategy no \
    --eval_steps 1000 \
    --eval_dataset_size 512 \
    --max_eval_samples 512 \
    --per_device_eval_batch_size 16 \
    --data_seed 42 \
    --dataloader_num_workers 3 \
    --group_by_length=False \
    --remove_unused_columns False \
    --do_train False \
    --do_eval False \
    --do_predict \
    --warmup_ratio 0.05 \
    --lr_scheduler_type cosine \
    --dataset ZihminWang/user-intention \
    --dataset_format ZihminWang/user-intention \
    --source_max_len 1024 \
    --target_max_len 1024 \
    --per_device_train_batch_size 16 \
    --num_train_epochs 1 \
    --learning_rate 1e-5 \
    --adam_beta2 0.999 \
    --max_grad_norm 1.0 \
    --weight_decay 0.01 \
    --seed 0 \
    --trust_remote_code \
    --report_to tensorboard \
    --load_checkpoint True \
    --predict_with_generate True \
    --max_predict_samples 512 \

In [None]:
# Show training progress.

%load_ext tensorboard
%tensorboard --logdir output/0001_run_instruction_finetune/runs

In [None]:
!huggingface-cli login

In [None]:
# Repo to upload your lastest checkpoint and training progress to.

new_repo_name = input("Please enter the new Hugging Face repository name (e.g., 'your-username/your-repo'): ").strip()

In [None]:
# Upload your lastest checkpoint and training progress.

from huggingface_hub import upload_folder
import os
import glob # For pattern matching file paths

# Define the base output directory where your checkpoints are saved
base_output_dir = "/content/TinyLlama/output/0001_run_instruction_finetune"

# Construct the pattern to find all checkpoint folders
# The `checkpoint-*` pattern will match folders like 'checkpoint-100', 'checkpoint-200', etc.
checkpoint_pattern = os.path.join(base_output_dir, "checkpoint-*")

# Use glob to find all matching checkpoint directories
# glob.glob returns a list of paths
all_checkpoints = glob.glob(checkpoint_pattern)

# Sort the checkpoints to find the latest one.
# Checkpoint folders are usually named 'checkpoint-STEP_NUMBER'.
# Sorting alphabetically/numerically will naturally put the highest step number last.
if all_checkpoints:
    for checkpoint_folder in all_checkpoints:
        training_args_path = os.path.join(checkpoint_folder, "training_args.bin")
        if os.path.exists(training_args_path):
            os.remove(training_args_path)
    print("\nClean-up complete. You can now upload your checkpoints.")

    all_checkpoints.sort(key=lambda x: int(os.path.basename(x).split('-')[1]))
    latest_checkpoint_folder = all_checkpoints[-1] # The last element after sorting is the latest
    print(f"Found latest checkpoint folder: {latest_checkpoint_folder}")

    upload_folder(
      folder_path=latest_checkpoint_folder,
      repo_id=new_repo_name,
      commit_message="Upload latest fine-tuned TinyLlama checkpoint",
      repo_type="model", # or "dataset", "space"
    )
    print(f"Model uploaded to https://huggingface.co/{new_repo_name}")
else:
    print(f"No checkpoint folders found in: {base_output_dir}")


# 3. Define the path to your local TensorBoard logs
# This should be the directory containing your 'events.out.tfevents.*' files
tensorboard_log_dir = "/content/TinyLlama/output/0001_run_instruction_finetune/runs"

# 4. Upload the logs
# The `path_in_repo` argument specifies where the files will be placed in your HF repo.
# It's good practice to mirror your local structure or put them in a 'runs' folder.
upload_folder(
    folder_path=tensorboard_log_dir,
    repo_id=new_repo_name,
    commit_message="Add TensorBoard training logs",
    repo_type="model",
    path_in_repo="runs", # This will put the logs inside a 'runs' folder in your HF repo
)

print(f"TensorBoard logs uploaded to https://huggingface.co/{new_repo_name}/tree/main/runs")
print(f"You should see a 'Training metrics' tab on your model page: https://huggingface.co/{new_repo_name}")