In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
model_name = 'HuggingFaceTB/SmolLM2-1.7B-Instruct'
dataset_name = 'ai-abstract-dataset.jsonl.xz'
output_dir = "runs/" + model_name.split("/")[-1] + dataset_name.split(".")[0]

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_name, 
                                             revision='main',
                                             trust_remote_code=False,
                                             attn_implementation='flash_attention_2',
                                             torch_dtype=torch.bfloat16,
                                             use_cache=False,
                                             device_map="cuda",
                                             low_cpu_mem_usage=True)

In [None]:
from peft import PeftModel
model = PeftModel.from_pretrained(model, output_dir)

In [None]:
model = model.merge_and_unload()

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
# Restore k,v cache for fast inference
model.config.use_cache = True
model.save_pretrained("smol-ai")
tokenizer.save_pretrained("smol-ai")

In [None]:
model.eval()

In [None]:
tokenizer.chat_template

In [None]:
messages = [{ "content": """You are an educated researcher and always answer in correct scientific terms.
                             You are very deep into AI and its methodologies. You are very creative.""", 
              "role": "system" }, 
            { "content": "Write an abstract with the title 'New Training Methods for LLMs'", 
              "role": "user" },
            ]

In [None]:
inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors = "pt").to("cuda")

In [None]:
print(tokenizer.batch_decode(inputs)[0])

In [None]:
outputs = model.generate(inputs, max_new_tokens = 512, use_cache = True,
                         do_sample=True, temperature=0.7, top_k=25, top_p=0.8)

In [None]:
print(tokenizer.batch_decode(outputs)[0])

In [None]:
outputs = model.generate(inputs, max_new_tokens = 512, use_cache = True,
                         do_sample=True, temperature=0.7, top_k=25, top_p=0.8)

In [None]:
print(tokenizer.batch_decode(outputs)[0])

In [None]:
messages = [{ "content": """You are an educated researcher and always answer in correct scientific terms.
                             You are very deep into AI and its methodologies. You are very creative.""", 
              "role": "system" }, 
            { "content": "Write an abstract with the title 'Frontiers in Increasing Random Forest Classification'", 
              "role": "user" },
            ]