In [3]:
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

Found existing installation: unsloth 2024.12.12
Uninstalling unsloth-2024.12.12:
  Successfully uninstalled unsloth-2024.12.12
Collecting git+https://github.com/unslothai/unsloth.git
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-req-build-aiie2a0x
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-req-build-aiie2a0x
  Resolved https://github.com/unslothai/unsloth.git to commit 87f5bffc45a8af7f23a41650b30858e097b86418
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: unsloth
  Building wheel for unsloth (pyproject.toml) ... [?25l[?25hdone
  Created wheel for unsloth: filename=unsloth-2024.12.12-py3-none-any.whl size=175166 sha256=3c2dfa9659502a41ccdd0b14633a0795534f9b806283cef14a3e8559faeac29c
  Stored in directory: /tmp/pip-ephem-wheel-cache-ysifr5lj/wheel

In [4]:
import numpy as np

In [9]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 4092 
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

fourbit_models = [
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit"            
] 

model, tokenizer = FastLanguageModel.from_pretrained(
    device_map="balanced",
    model_name = "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

==((====))==  Unsloth 2024.12.12: Fast Mistral patching. Transformers: 4.47.1.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/4.14G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/157 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/141k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/446 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

In [10]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [11]:
# Define the alpaca_prompt template
alpaca_prompt = """
System: {0}

Instruction: {1}

Response: {2}
"""

# Tokenize and move to GPU
inputs = tokenizer(
    alpaca_prompt.format(
        "Provide the necessary calculations and reasoning to solve the given problem.",  # system
        "In a school, there are 400 students. Among them, 60% are boys.Calculate no of boys",  # instruction
        ""  # output - leave this blank for generation!
    ),
    return_tensors="pt"  # Convert to PyTorch tensors
).to("cuda")  # Move to GPU

EOS_TOKEN = tokenizer.eos_token  # Ensure EOS_TOKEN is included

# Define formatting function
def formatting_prompts_func(examples):
    systems = examples["system"]          # Fetch 'system' field
    instructions = examples["instruction"]  # Fetch 'instruction' field
    responses = examples["response"]      # Fetch 'response' field
    texts = []
    for system, instruction, response in zip(systems, instructions, responses):
        # Format using the alpaca_prompt and add EOS_TOKEN
        text = alpaca_prompt.format(system, instruction, response) + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

from datasets import load_dataset
# Load the Synthia dataset
dataset = load_dataset("migtissera/Tess-v1.5", split="train")
# Apply formatting function
dataset = dataset.map(formatting_prompts_func, batched=True)

Map:   0%|          | 0/125594 [00:00<?, ? examples/s]

In [12]:
total_length = 0
num_samples = 30000

for _ in range(num_samples):
    i = np.random.randint(0, 40000)
    sample = dataset[i]
    total_length += len(sample['system']) + len(sample['instruction']) + len(sample['response'])

mean_length = total_length / num_samples
print(mean_length)

4216.1809


In [13]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length =8192,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 150,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 5,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

Map (num_proc=2):   0%|          | 0/125594 [00:00<?, ? examples/s]

In [14]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 125,594 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 150
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
5,0.9895
10,0.8862
15,0.692
20,0.6641
25,0.6353
30,0.6048
35,0.6177
40,0.5456
45,0.5589
50,0.4861


TrainOutput(global_step=150, training_loss=0.5688667964935302, metrics={'train_runtime': 6781.7813, 'train_samples_per_second': 0.177, 'train_steps_per_second': 0.022, 'total_flos': 9.251224049836032e+16, 'train_loss': 0.5688667964935302, 'epoch': 0.009554596557160374})

In [15]:
model.save_pretrained("lora_model") # Local saving
tokenizer.save_pretrained("lora_model")

('lora_model/tokenizer_config.json',
 'lora_model/special_tokens_map.json',
 'lora_model/tokenizer.model',
 'lora_model/added_tokens.json',
 'lora_model/tokenizer.json')

In [16]:
if True:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "lora_model", # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference

# alpaca_prompt = You MUST copy from above!

inputs = tokenizer(
[
    alpaca_prompt.format(
        "Answer the question correctly and accurately", # instruction
        "How does the neurobiological process of synaptic plasticity contribute to the development and persistence of substance use disorders, particularly in the context of the reward system's dopamine pathways?", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 1000)

==((====))==  Unsloth 2024.12.12: Fast Mistral patching. Transformers: 4.47.1.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth: Will load lora_model as a legacy tokenizer.


<s>
System: Answer the question correctly and accurately

Instruction: How does the neurobiological process of synaptic plasticity contribute to the development and persistence of substance use disorders, particularly in the context of the reward system's dopamine pathways?

Response: 

Synaptic plasticity refers to the ability of synapses, the junctions between neurons, to strengthen or weaken over time in response to increases or decreases in their activity. This process is crucial for learning and memory, as it allows the brain to adapt to new experiences and environments.

In the context of substance use disorders, synaptic plasticity plays a significant role in the development and persistence of these disorders. Substances of abuse, such as cocaine, amphetamine, and nicotine, interact with the brain's reward system, which is primarily mediated by the neurotransmitter dopamine.

When a person takes a drug, it interacts with dopamine receptors in the brain, leading to an increase in