In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from unsloth import FastLanguageModel

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
[2025-03-19 15:47:42,842] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [2]:
model_name = 'microsoft/Phi-3.5-mini-instruct'
dataset_name = 'ai-abstract-dataset.jsonl.xz'
output_dir = "runs/" + model_name.split("/")[-1] + dataset_name.split(".")[0]

In [3]:
model, tokenizer = FastLanguageModel.from_pretrained(model_name, 
    max_seq_length = 2048,
    dtype = None, # autodetect
    load_in_4bit = False)

==((====))==  Unsloth 2025.1.8: Fast Llama patching. Transformers: 4.48.2.
   \\   /|    GPU: NVIDIA GeForce RTX 4090. Max memory: 23.524 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.9. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = True]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
from peft import PeftModel
model = PeftModel.from_pretrained(model, output_dir)

In [5]:
model = model.merge_and_unload()

In [6]:
FastLanguageModel.for_inference(model)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32064, 3072, padding_idx=32000)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (k_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (v_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (o_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=3072, out_features=8192, bias=False)
          (up_proj): Linear(in_features=3072, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=3072, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
      )
 

In [7]:
tokenizer.chat_template

"{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}"

In [8]:
messages = [{ "content": """You are an educated researcher and always answer in correct scientific terms.
                             You are very deep into AI and its methodologies. You are very creative.""", 
              "role": "system" }, 
            { "content": "Write an abstract with the title 'New Training Methods for LLMs'", 
              "role": "user" },
            ]

In [16]:
inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors = "pt").to("cuda")

In [17]:
print(tokenizer.batch_decode(inputs)[0])

<|system|> You are an educated researcher and always answer in correct scientific terms.
                             You are very deep into AI and its methodologies. You are very creative.<|end|><|user|> Write an abstract with the title 'Frontiers in Increasing Random Forest Classification'<|end|><|assistant|>


In [18]:
outputs = model.generate(inputs, max_new_tokens = 512, use_cache = True,
                         do_sample=True, temperature=0.7, top_k=25, top_p=0.8)

In [19]:
print(tokenizer.batch_decode(outputs)[0])

<|system|> You are an educated researcher and always answer in correct scientific terms.
                             You are very deep into AI and its methodologies. You are very creative.<|end|><|user|> Write an abstract with the title 'Frontiers in Increasing Random Forest Classification'<|end|><|assistant|> This paper presents a comprehensive exploration of the frontiers in increasing Random Forest classification. The Random Forest algorithm, a popular ensemble learning method, has demonstrated remarkable performance in various machine learning tasks. However, the quest for improved accuracy, efficiency, and generalization continues to drive research in this area. This paper provides an in-depth analysis of recent advancements in Random Forest classification, including novel techniques for feature selection, ensemble learning strategies, and integration with deep learning architectures. We discuss the challenges and opportunities in optimizing Random Forest algorithms for different

In [13]:
outputs = model.generate(inputs, max_new_tokens = 512, use_cache = True,
                         do_sample=True, temperature=0.7, top_k=25, top_p=0.8)

In [14]:
print(tokenizer.batch_decode(outputs)[0])

<|system|> You are an educated researcher and always answer in correct scientific terms.
                             You are very deep into AI and its methodologies. You are very creative.<|end|><|user|> Write an abstract with the title 'New Training Methods for LLMs'<|end|><|assistant|> The training of Large Language Models (LLMs) is a complex and time-consuming process that involves optimizing a large number of parameters using gradient descent. In this paper, we propose new training methods that can improve the efficiency and effectiveness of LLM training. Our methods include techniques such as knowledge distillation, parameter-efficient training, and data augmentation. We demonstrate the effectiveness of our methods through experiments on LLMs such as GPT-3 and BERT. Our results show that our methods can improve the performance of LLMs while reducing the training time and computational resources required.<|end|><|endoftext|>


In [15]:
messages = [{ "content": """You are an educated researcher and always answer in correct scientific terms.
                             You are very deep into AI and its methodologies. You are very creative.""", 
              "role": "system" }, 
            { "content": "Write an abstract with the title 'Frontiers in Increasing Random Forest Classification'", 
              "role": "user" },
            ]