In [1]:
from unsloth import FastLanguageModel
import torch
import json

Unsloth: Patching Xformers to fix some performance issues.
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
hf_key = "***"
repo = "AlphaBuffet"

In [3]:
models = [
    "unsloth/Mistral-Small-24B-Base-2501",
    "unsloth/Mistral-Small-24B-Base-2501-bnb-4bit",
    "unsloth/Mistral-Small-24B-Base-2501-unsloth-bnb-4bit",
    "unsloth/Mistral-Small-24B-Instruct-2501",
    "unsloth/Mistral-Small-24B-Instruct-2501-bnb-4bit",
    "unsloth/Mistral-Small-24B-Instruct-2501-unsloth-bnb-4bit",
    "unsloth/phi-4",
    "unsloth/phi-4-bnb-4bit",
    "unsloth/phi-4-unsloth-bnb-4bit",
] 

dtype = None
load_in_4bit = False
max_seq_length = 8192

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = models[3],
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

==((====))==  Unsloth 2025.2.4: Fast Mistral patching. Transformers: 4.48.3.
   \\   /|    GPU: NVIDIA H100 PCIe. Max memory: 79.097 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1. CUDA: 9.0. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Downloading shards: 100%|██████████| 10/10 [01:36<00:00,  9.63s/it]
Loading checkpoint shards: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]


In [4]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2025.2.4 patched 40 layers with 40 QKV layers, 40 O layers and 40 MLP layers.


In [5]:
from unsloth.chat_templates import get_chat_template
"""
#Phi
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "phi-4",
)

"""
# Mistral
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "chatml",
    mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
    map_eos_token = True,
)

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [
        tokenizer.apply_chat_template(
            convo, tokenize = False, add_generation_prompt = False
        )
        for convo in convos
    ]
    return { "text" : texts, }
pass

Unsloth: Will map <|im_end|> to EOS = </s>.
You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message.


Note that all the conversions of format are a bit pointless, I'm just doing them to be more faithful from to the original unsloth notebooks and to avoid regenerating
syntetic data. It would be possible to just have the llm output the right format in json. Or use get_chat_template on the format I have.

In [6]:
def convert_conversations_json(input_file, output_file):
    """
    Converts conversation JSON format between files
    Input format: {"conversations": [[{"conversations": [{"role": "...", "content": "..."}]}]]}
    Output format: {'conversations': [[{'from': '...', 'value': '...'}]]}
    """
    try:
        # Read input JSON
        with open(input_file, 'r', encoding='utf-8') as f:
            data = json.load(f)
            
        result = []
        
        # Process each conversation group in the conversations list
        for conv_group in data['conversations']:
            messages = []
            
            # Extract the conversations from the nested structure
            conv_data = conv_group[0]['conversations']
            
            # Process each message in the conversation
            for msg in conv_data:
                # Map 'role' to 'from' and handle 'assistant' -> 'gpt' conversion
                from_value = 'gpt' if msg['role'] == 'assistant' else msg['role']
                
                converted_msg = {
                    'from': from_value,
                    'value': msg['content']
                }
                messages.append(converted_msg)
                
            result.append(messages)
        
        # Create output JSON
        output = {'conversations': result}
        
        # Write to output file
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(output, f, indent=2, ensure_ascii=False)
            
        return f"Successfully converted {input_file} to {output_file}"
        
    except Exception as e:
        return f"Error converting JSON: {str(e)}"

In [7]:
convert_conversations_json('Dataset/Processed/Ground Truth/dataset_combined_2.json',
                           'Dataset/Processed/Ground Truth/dataset_clean.json')

'Successfully converted Dataset/Processed/Ground Truth/dataset_combined_2.json to Dataset/Processed/Ground Truth/dataset_clean.json'

In [8]:
from datasets import load_dataset
dataset = load_dataset(
    "json",
    data_files = "Dataset/Processed/Ground Truth/dataset_clean.json",
    split = "train",
)

Generating train split: 3482 examples [00:00, 46531.36 examples/s]


In [9]:
dataset[5]['conversations']

[{'from': 'human',
  'value': 'How did the overall performance of your equity investments compare to the Dow-Jones Industrial Average during the same period?'},
 {'from': 'gpt',
  'value': "Well, it's pretty clear we came out on top. While the Dow-Jones Industrial Average actually declined from 852 to 805, our overall unrealized and realized pre-tax gains in equities for the three-year period came to approximately $112 million. I'd say that was a marvelous period for us value-oriented equity buyers."}]

In [10]:
from unsloth.chat_templates import standardize_sharegpt

dataset = standardize_sharegpt(dataset)

Standardizing format: 100%|██████████| 3482/3482 [00:00<00:00, 38712.92 examples/s]


In [11]:
dataset[5]['conversations']

[{'content': 'How did the overall performance of your equity investments compare to the Dow-Jones Industrial Average during the same period?',
  'role': 'user'},
 {'content': "Well, it's pretty clear we came out on top. While the Dow-Jones Industrial Average actually declined from 852 to 805, our overall unrealized and realized pre-tax gains in equities for the three-year period came to approximately $112 million. I'd say that was a marvelous period for us value-oriented equity buyers.",
  'role': 'assistant'}]

In [12]:
dataset = dataset.map(
    formatting_prompts_func,
    batched=True,
)

Map: 100%|██████████| 3482/3482 [00:00<00:00, 31500.21 examples/s]


In [13]:
dataset[5]['text']

"<|im_start|>user\nHow did the overall performance of your equity investments compare to the Dow-Jones Industrial Average during the same period?<|im_end|>\n<|im_start|>assistant\nWell, it's pretty clear we came out on top. While the Dow-Jones Industrial Average actually declined from 852 to 805, our overall unrealized and realized pre-tax gains in equities for the three-year period came to approximately $112 million. I'd say that was a marvelous period for us value-oriented equity buyers.<|im_end|>\n"

In [14]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 2,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
    ),
)

Map (num_proc=2): 100%|██████████| 3482/3482 [00:02<00:00, 1630.05 examples/s]


In [15]:
from unsloth.chat_templates import train_on_responses_only
"""
#Phi

trainer = train_on_responses_only(
    trainer,
    instruction_part="<|im_start|>user<|im_sep|>",
    response_part="<|im_start|>assistant<|im_sep|>",
)

"""
# Mistral

trainer = train_on_responses_only(
    trainer,
    instruction_part="<|im_start|>user\n",
    response_part="<|im_start|>assistant\n",
)


Map: 100%|██████████| 3482/3482 [00:00<00:00, 10336.49 examples/s]


In [16]:
tokenizer.decode(trainer.train_dataset[5]["input_ids"])

"<|im_start|>user\nHow did the overall performance of your equity investments compare to the Dow-Jones Industrial Average during the same period?<|im_end|>\n<|im_start|>assistant\nWell, it's pretty clear we came out on top. While the Dow-Jones Industrial Average actually declined from 852 to 805, our overall unrealized and realized pre-tax gains in equities for the three-year period came to approximately $112 million. I'd say that was a marvelous period for us value-oriented equity buyers.<|im_end|>\n"

In [17]:
space = tokenizer(" ", add_special_tokens = False).input_ids[0]
tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[5]["labels"]])

"                                       \nWell, it's pretty clear we came out on top. While the Dow-Jones Industrial Average actually declined from 852 to 805, our overall unrealized and realized pre-tax gains in equities for the three-year period came to approximately $112 million. I'd say that was a marvelous period for us value-oriented equity buyers.<|im_end|>\n"

In [18]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA H100 PCIe. Max memory = 79.097 GB.
44.617 GB of memory reserved.


In [19]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 3,482 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 870
 "-____-"     Number of trainable parameters = 184,811,520


Step,Training Loss
1,2.2139
2,2.1315
3,1.9575
4,1.54
5,1.68
6,1.4701
7,1.4461
8,1.6171
9,1.4936
10,1.4964


In [20]:
from huggingface_hub import delete_repo

delete_repo(
    repo_id=repo,
    token=hf_key
)

In [21]:
# save_method = "merged_4bit" or "merged_16bit"

model.push_to_hub_merged(repo, tokenizer, save_method = "merged_16bit", commit_message = "Initial Commit", commit_description = "Initial Commit after Lora Fine Tuning", token = hf_key, private = True)

Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 140.61 out of 221.18 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


 32%|███▎      | 13/40 [00:00<00:00, 41.29it/s]
We will save to Disk and not RAM now.
100%|██████████| 40/40 [02:25<00:00,  3.65s/it]


Unsloth: Saving to organization with address brokenlander/AlphaBuffet
 Done.h: Saving tokenizer...
Unsloth: Saving to organization with address brokenlander/AlphaBuffet
Unsloth: Uploading all files... Please wait...


100%|██████████| 11/11 [02:15<00:00, 12.30s/it]


Done.
Saved merged model to https://huggingface.co/None/AlphaBuffet


In [42]:
from unsloth.chat_templates import get_chat_template

FastLanguageModel.for_inference(model)

messages = [
    {
        "role": "system",
        "content": """You are Warren Buffett, Chairman of Berkshire Hathaway. You MUST provide detailed, comprehensive responses that thoroughly explain your reasoning. Each response should be at least 500 words and cover multiple analytical angles.

EXAMPLE RESPONSE STRUCTURE (you must follow this format):
1. Opening Position Statement (2-3 paragraphs)
   - Clear statement of position
   - Initial context and background
   - Why this matters to investors

2. Historical Context & Lessons (2-3 paragraphs)
   - Relevant historical examples
   - Past similar situations
   - Lessons learned from history

3. Detailed Analysis (4-5 paragraphs)
   - Circle of competence considerations
   - Competitive analysis
   - Economic characteristics
   - Risks and challenges
   - Management quality

4. Investment Principles Application (2-3 paragraphs)
   - Margin of safety discussion
   - Moat analysis
   - Intrinsic value considerations
   - Long-term perspective

5. Personal Experience & Analogies (2-3 paragraphs)
   - Relevant personal stories
   - Nebraska/Omaha perspective
   - Folksy analogies
   - Baseball or farm analogies

6. Conclusion & Investment Wisdom (2-3 paragraphs)
   - Summary of key points
   - Broader investment lessons
   - Action items or recommendations

KEY REQUIREMENTS:
- MUST use specific numbers and calculations
- MUST include at least 3 analogies or metaphors
- MUST reference at least 2 historical examples
- MUST discuss both bull and bear cases
- MUST tie back to fundamental investment principles
- MUST include personal anecdotes
- MUST provide actionable insights

STYLE NOTES:
- Write as if preparing a detailed section for the annual shareholder letter
- Use plain language but show deep analytical thinking
- Include Graham-style value investing principles
- Reference key maxims: circle of competence, margin of safety, moats
- Maintain folksy but intelligent tone

End every response with a summary of 3-5 key takeaway principles."""
    },
    {
        "role": "user",
        "content": "Should I invest in AI?"
    }
]

inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True, 
    return_tensors = "pt",
).to("cuda")

outputs = model.generate(
    input_ids = inputs, max_new_tokens = 1000, use_cache = True, temperature = 0.3, min_p = 0.1
)
tokenizer.batch_decode(outputs)

["<|im_start|>system\nYou are Warren Buffett, Chairman of Berkshire Hathaway. You MUST provide detailed, comprehensive responses that thoroughly explain your reasoning. Each response should be at least 500 words and cover multiple analytical angles.\n\nEXAMPLE RESPONSE STRUCTURE (you must follow this format):\n1. Opening Position Statement (2-3 paragraphs)\n   - Clear statement of position\n   - Initial context and background\n   - Why this matters to investors\n\n2. Historical Context & Lessons (2-3 paragraphs)\n   - Relevant historical examples\n   - Past similar situations\n   - Lessons learned from history\n\n3. Detailed Analysis (4-5 paragraphs)\n   - Circle of competence considerations\n   - Competitive analysis\n   - Economic characteristics\n   - Risks and challenges\n   - Management quality\n\n4. Investment Principles Application (2-3 paragraphs)\n   - Margin of safety discussion\n   - Moat analysis\n   - Intrinsic value considerations\n   - Long-term perspective\n\n5. Persona