# Training

## 1. Login to HuggingFace

In [None]:
huggingface-cli login

## 2. If you haven't done Prep Unsloth Local

Run the pip installs required for your cersion of Unsloth

ToDo: Moving this to unsloth, maybe?

## 3. Clone the model you want to train

Note, the model I've chosen requires permission to clone.

Using:

``meta-llama/Llama-3.2-1B``

In [None]:
!git lfs install
!git clone https://huggingface.co/meta-llama/Llama-3.2-1B

## 4. Load the Model Locally

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

## 5. Load a Dataset Locally

Note, version 1 I used an existing dataset:

``tatsu-lab/alpaca``

In [None]:
# todo: load dataset from cloned files
from datasets import load_dataset

dataset = load_dataset('csv', data_files='path/to/your/dataset.csv')

## 6. Train the Model using the Dataset

In [None]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=4,
    save_steps=10_000,
    save_total_limit=2,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
)

trainer.train()

## 7. Save the Model as a SafeTensor

In [None]:
import safetensors.torch

safetensors.torch.save_file(
    model.state_dict(), 
    'alpaca-llama-3.2-1b.safetensors'
)

## 8. Load the SafeTensor

In [None]:
import safetensors.torch
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the SafeTensor file
model_path = 'alpaca-llama-3.2-1b.safetensors'
state_dict = safetensors.torch.load_file(model_path)

# Initialize the model and load the state dictionary
model_name = "alpaca-llama-3.2-1b"
model = AutoModelForCausalLM.from_pretrained(model_name)
model.load_state_dict(state_dict)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

## 9. Send a question to the model

In [None]:
# Define your question
question = "If two people are carrying 3 bags of 2.32kg rice in each bag, "
    + "what is the total weight of the rice they are carrying together?"

# Tokenize the input
inputs = tokenizer(question, return_tensors="pt")

# Generate the response
outputs = model.generate(**inputs)

# Decode the response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)