In [None]:
import torch

# Check if GPU is available
if torch.cuda.is_available():
    print(f"CUDA GPU is available. Device name: {torch.cuda.get_device_name(0)}")
else:
    raise RuntimeError("CUDA GPU is not available. Please check your runtime settings.")


CUDA GPU is available. Device name: Tesla T4


In [None]:
# Install necessary dependencies
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

import torch
major_version, minor_version = torch.cuda.get_device_capability()

# Install dependencies based on GPU capability
if major_version >= 8:
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    !pip install --no-deps xformers trl peft accelerate bitsandbytes


Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-qft4cxr0/unsloth_ab350254fc0d44249fb8ee029dc0fcbc
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-qft4cxr0/unsloth_ab350254fc0d44249fb8ee029dc0fcbc
  Resolved https://github.com/unslothai/unsloth.git to commit 976d11a10d54383aeb7a692c69e01151a20bfd72
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tyro (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Downloading tyro-0.8.10-py3-none-any.whl.metadata (8.4 kB)
Collecting transformers>=4.43.2 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[

In [None]:
!pip install triton


Collecting triton
  Downloading triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB)
Downloading triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (209.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.4/209.4 MB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: triton
Successfully installed triton-3.0.0


In [None]:
from unsloth import FastLanguageModel

# Load the 4-bit quantized LLaMA-3 model
model_name = "unsloth/llama-3-8b-bnb-4bit"
max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.8: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.0+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.27.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/198 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

In [None]:
import json

# Load your JSON dataset
with open('/content/stats_problems_dataset.json', 'r') as f:
    questions = json.load(f)


In [None]:
# Find entries missing the 'solution' key
missing_solution = [q for q in questions if 'solution' not in q]

# Find entries missing the 'steps' key within 'solution'
missing_steps = [q for q in questions if 'solution' in q and 'steps' not in q['solution']]

print(f"Found {len(missing_solution)} entries without 'solution'.")
print(f"Found {len(missing_steps)} entries without 'steps'.")


Found 3 entries without 'solution'.
Found 8 entries without 'steps'.


In [None]:
import json
from datasets import Dataset


def format_question(q):
    instruction = "Solve the following statistics problem."
    problem = q.get('problem', '')

    solution_steps = q.get('solution', {}).get('steps', [])
    solution_steps = "\n".join(solution_steps) if isinstance(solution_steps, list) else str(solution_steps)

    conclusion = q.get('solution', {}).get('conclusion', '')
    conclusion = str(conclusion)  # Ensure conclusion is a string

    explanation = q.get('explanation', '')

    formatted = f"### Instruction:\n{instruction}\n\n### Problem:\n{problem}\n\n### Solution Steps:\n{solution_steps}\n\n### Conclusion:\n{conclusion}\n\n### Explanation:\n{explanation}"

    return formatted

# Filter out invalid entries and format the data
valid_formatted_data = []
for q in questions:
    try:
        formatted = format_question(q)
        if formatted.strip():  # Ensure we're not adding empty strings
            valid_formatted_data.append(formatted)
    except Exception as e:
        print(f"Skipping an invalid entry: {e}")

# Create the dataset
dataset = Dataset.from_dict({"text": valid_formatted_data})

print(f"Processed {len(valid_formatted_data)} valid questions out of {len(questions)} total.")

Processed 1905 valid questions out of 1905 total.


In [None]:
from unsloth import FastLanguageModel

# Apply LoRA adapters for efficient fine-tuning
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # You can experiment with this value
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)


Unsloth 2024.8 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
# Import necessary libraries
from google.colab import drive
from transformers import TrainingArguments, AutoModelForCausalLM, AutoTokenizer
from trl import SFTTrainer

# Mount Google Drive to save your work
drive.mount('/content/drive')

# Set your output directory to Google Drive
output_dir = '/content/drive/MyDrive/fine_tuned_model'


Mounted at /content/drive


In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments

training_args = TrainingArguments(
    per_device_train_batch_size=1,  # Reduced from 2 due to T4 GPU constraints
    gradient_accumulation_steps=8,  # Increased from 4 to compensate for smaller batch size
    warmup_steps=10,
    max_steps=-1,  # Set to -1 to disable max steps limit
    num_train_epochs=3,  # Adjust based on performance and time constraints
    learning_rate=1e-4,  # Slightly reduced from 2e-4
    fp16=True,  # T4 GPU supports FP16
    logging_steps=10,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="cosine",  # Changed to cosine for potentially better performance
    seed=42,
    output_dir="outputs",
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    args=training_args,
)


Map (num_proc=2):   0%|          | 0/1905 [00:00<?, ? examples/s]

In [None]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 1,905 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 1 | Gradient Accumulation steps = 8
\        /    Total batch size = 8 | Total steps = 714
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
10,1.0206
20,0.7064
30,0.5702
40,0.5641
50,0.4557
60,0.4648
70,0.4549
80,0.4236
90,0.3899
100,0.429


TrainOutput(global_step=714, training_loss=0.30399076152248544, metrics={'train_runtime': 8142.6521, 'train_samples_per_second': 0.702, 'train_steps_per_second': 0.088, 'total_flos': 7.369715558075597e+16, 'train_loss': 0.30399076152248544, 'epoch': 2.998425196850394})

In [None]:
model.save_pretrained("outputs/fine_tuned_model")
tokenizer.save_pretrained("outputs/fine_tuned_model")


('outputs/fine_tuned_model/tokenizer_config.json',
 'outputs/fine_tuned_model/special_tokens_map.json',
 'outputs/fine_tuned_model/tokenizer.json')

In [None]:
import shutil

# Zip the 'outputs/fine_tuned_model' directory
shutil.make_archive("fine_tuned_model", 'zip', "outputs/fine_tuned_model")



KeyboardInterrupt: 

In [None]:
# Move the zip file to Google Drive
shutil.move("fine_tuned_model.zip", "/content/drive/MyDrive/fine_tuned_model.zip")


'/content/drive/MyDrive/fine_tuned_model.zip'

In [None]:
from google.colab import drive
drive.mount('/content/drive')
import os

# Create a new directory in Google Drive to store the outputs (optional)
output_drive_dir = '/content/drive/MyDrive/Colab_Outputs'
os.makedirs(output_drive_dir, exist_ok=True)
import shutil

# Move the entire 'outputs' folder to Google Drive
shutil.move('outputs', output_drive_dir)

# Move the 'checkpoint-500' folder to Google Drive
shutil.move('checkpoint-500', output_drive_dir)

# Move the 'checkpoint-714' folder to Google Drive
shutil.move('checkpoint-714', output_drive_dir)

# Move the 'fine_tuned_model' folder to Google Drive
shutil.move('fine_tuned_model', output_drive_dir)

# Move the 'runs' folder to Google Drive
shutil.move('runs', output_drive_dir)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


FileNotFoundError: [Errno 2] No such file or directory: 'checkpoint-500'

In [None]:
import os

# List the contents of the saved model directory
os.listdir(output_dir)


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/fine_tuned_model'

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the fine-tuned model and tokenizer
model = AutoModelForCausalLM.from_pretrained("outputs/fine_tuned_model")
tokenizer = AutoTokenizer.from_pretrained("outputs/fine_tuned_model")

# Check if the attribute exists
if not hasattr(model.config, 'max_seq_length'):
    model.config.max_seq_length = 1024  # or another appropriate value

# Example inference
input_text = "Solve the following statistics problem: Calculate the mean and median for the dataset: 2, 4, 6, 8, 10."
inputs = tokenizer(input_text, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=150)

# Print the generated output
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


`low_cpu_mem_usage` was None, now set to True since model is quantized.


OutOfMemoryError: CUDA out of memory. Tried to allocate 28.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 21.06 MiB is free. Process 5188 has 14.72 GiB memory in use. Of the allocated memory 14.56 GiB is allocated by PyTorch, and 8.35 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)