In [None]:
%%capture
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
!pip install --no-deps unsloth

In [None]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git@nightly git+https://github.com/unslothai/unsloth-zoo.git

In [None]:
import torch
from unsloth.chat_templates import get_chat_template, train_on_responses_only
from trl import SFTTrainer, SFTConfig
from unsloth import FastLanguageModel
from datasets import Dataset

import pandas as pd

In [None]:
import ast

In [None]:
model_id = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit"
max_seq_length = 1024
model,tokenizer=FastLanguageModel.from_pretrained(
    model_name=model_id,
    load_in_4bit=True,
    max_seq_length=max_seq_length,
    dtype=None,
    device_map="auto"
)

==((====))==  Unsloth 2025.10.11: Fast Llama patching. Transformers: 4.57.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/1.03G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],

    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing='unsloth',
    random_state = 3407,
    use_rslora=False,
    loftq_config=None
)

Unsloth: Already have LoRA adapters! We shall skip this step.


In [None]:
# from datasets import load_dataset
# dataset = load_dataset('subinc/youtube-comment-summary')
df = pd.read_csv('/content/news_input_output.csv')

In [None]:
df = df[['topic','input','output']]
df.head()

Unnamed: 0,topic,input,output
0,Elections and voting behavior,"[""News number : 1\n Score : 1696 \n title : “T...",The recent No Kings protests against Trump's l...
1,Political corruption scandals,"[""News number : 1\n Score : 24 \n title : How ...",In recent discussions surrounding political in...
2,Government policy reforms,['News number : 1\n Score : 170 \n title : Opi...,Recent public discourse reveals a growing mome...
3,International diplomacy,['News number : 1\n Score : 581 \n title : Int...,The recent discussions surrounding internation...
4,Refugee and immigration policies,['News number : 1\n Score : 335 \n title : Una...,Recent discussions highlight critical issues s...


In [None]:
df.head(2)

Unnamed: 0,topic,input,output
0,Elections and voting behavior,"[""News number : 1\n Score : 1696 \n title : “T...",The recent No Kings protests against Trump's l...
1,Political corruption scandals,"[""News number : 1\n Score : 24 \n title : How ...",In recent discussions surrounding political in...


In [None]:
def get_sys_prompt(topic):
  system_prompt = f'''You are given topic {topic} and top 3 relevant news headlines along with selected user comments
                  from multiple blog posts. Your task is to create a single, well-structured paragraph
                  that summarizes all key information in an informative, concise, and cohesive way.'''
  return system_prompt


def make_conversation(row):
  return {
      'role':'system','content':get_sys_prompt(row['topic'])
  },{
      'role':'user','content':row['input']
  },{
      'role':'assistant','content':row['output']
  }

df['conversation'] = df.apply(make_conversation,axis=1)

In [None]:
tokenizer = get_chat_template(tokenizer,chat_template='llama-3.2')

In [None]:
def apply_chat_template_custom(chat):
  text = tokenizer.apply_chat_template(
      chat,
      tokenize=False,
      add_generation_prompt=False
      ).replace('<bos>','')
  return text

df['text'] = df['conversation'].apply(apply_chat_template_custom)

In [None]:
train_dataset = Dataset.from_pandas(df)
train_dataset

Dataset({
    features: ['topic', 'input', 'output', 'conversation', 'text'],
    num_rows: 100
})

In [None]:
max_seq_length = 1024
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    max_seq_length = max_seq_length,
    eval_dataset=None,
    args=SFTConfig(
        per_device_train_batch_size=8,
        gradient_accumulation_steps=1,
        dataset_text_field="text",
        warmup_steps=5,
        num_train_epochs = 2,
        # Set this for 1 full training run.
        learning_rate=2e-4,
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none",
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

Map (num_proc=6):   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
train_stats = trainer.train()

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 100 | Num Epochs = 2 | Total steps = 26
O^O/ \_/ \    Batch size per device = 8 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (8 x 1 x 1) = 8
 "-____-"     Trainable parameters = 22,544,384 of 1,258,358,784 (1.79% trained)


Step,Training Loss
1,2.3325
2,2.2869
3,2.328
4,2.2328
5,2.2448
6,2.0657
7,2.2643
8,2.0924
9,2.0043
10,1.9544


Unsloth: Will smartly offload gradients to save VRAM!


In [None]:
x1[0]

{'role': 'system',
 'content': 'You are given topic Elections and voting behavior and top 3 relevant news headlines along with selected user comments\n                  from multiple blog posts. Your task is to create a single, well-structured paragraph \n                  that summarizes all key information in an informative, concise, and cohesive way.'}

In [None]:
news1 = '''[
"News number : 1\nScore : 5463\nTitle : [OC] Political and Social differences between Gen Z Men and Women in the US\nComment : ▲ 2531 | Women are more cohesive politically, while men are evenly split.\nComment : ▲ 538 | The chart could be misleading.\nComment : ▲ 2424 | Many men recognize toxic masculinity as an issue.\nEND",
"News number : 2\nScore : 13883\nTitle : Nearly 40% of Gen Z women identify as atheist, agnostic, or no faith.\nComment : ▲ 2588 | Good for them.\nComment : ▲ 1267 | Surprised it’s not higher.\nComment : ▲ 434 | That number is still low.\nEND",
"News number : 3\nScore : 7176\nTitle : Gen Z gooooood?\nContent : For youths aged 12-20 in the US, the average age of first alcohol use was 13.65 years during 1991-1993.\nEND"
]'''

In [None]:
from transformers import TextStreamer
from pprint import pprint

x1 = df['conversation'][5]

messages = [
    {"role": "system", "content": get_sys_prompt('gen Z') },
    {"role": "user", "content": news1}
]

text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

inputs = tokenizer(text, return_tensors="pt").to(model.device)
streamer = TextStreamer(tokenizer, skip_prompt=True)

_ = model.generate(
    **inputs,
    max_new_tokens=1250,
    temperature=0.9,
    top_p=0.9,
    do_sample=True,
    streamer=streamer,
)

A recent poll highlights a significant political and social divide between Gen Z men and women in the United States. A survey indicates that women are more cohesive politically than men, showing a trend of 2531 women holding strong convictions versus 538 men, with many men recognizing toxic masculinity as a pressing issue. This disparity underscores a growing awareness among men about the importance of gender equality, reflecting a broader societal shift where both genders are increasingly advocating for fairness and accountability. Meanwhile, a significant portion of Gen Z women identify as atheists, agnostics, or non-believers, sparking discussions about religious tolerance and the future of faith in the country. The popularity of this sentiment suggests a growing desire for inclusivity, highlighting a collective effort to move beyond traditional religious affiliations and create a more accepting environment. These contrasting narratives underscore the evolving values and priorities 

In [None]:
trainer.save_model("QuickScope")
tokenizer.save_pretrained("QuickScope")

('QuickScope/tokenizer_config.json',
 'QuickScope/special_tokens_map.json',
 'QuickScope/chat_template.jinja',
 'QuickScope/tokenizer.json')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="./get-updates-model",  # Your saved model directory
    max_seq_length=1024,               # Customize context length as needed
    dtype=None,                        # Or specify float16/float32
    load_in_4bit=True,                 # If you saved model quantized
    local_files_only=True              # Prefer local files
)

FastLanguageModel.for_inference(model) # Prepares your model for inference


==((====))==  Unsloth 2025.10.9: Fast Gemma3 patching. Transformers: 4.57.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.
Unsloth: Gemma3 does not support SDPA - switching to fast eager.


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Gemma3ForCausalLM(
      (model): Gemma3TextModel(
        (embed_tokens): Gemma3TextScaledWordEmbedding(262144, 640, padding_idx=0)
        (layers): ModuleList(
          (0-2): 3 x Gemma3DecoderLayer(
            (self_attn): Gemma3Attention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=640, out_features=1024, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=640, out_features=128, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=128, out_features=1024, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
       

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
from transformers import TextStreamer

In [None]:
bnb_4bit = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",          # NF4 is recommended for LLMs (QLoRA)
    bnb_4bit_compute_dtype=torch.bfloat16,  # compute precision
)
model_path = "./fine-tuned"  # Path to your local model directory

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    quantization_config=bnb_4bit,
    torch_dtype=torch.bfloat16,   # CPU-friendly dtype
    device_map="auto"
)
model.eval()

NameError: name 'BitsAndBytesConfig' is not defined

In [None]:
!pip install bitsandbytes==0.43.2

Collecting bitsandbytes==0.43.2
  Downloading bitsandbytes-0.43.2-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Downloading bitsandbytes-0.43.2-py3-none-manylinux_2_24_x86_64.whl (137.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.5/137.5 MB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.43.2


In [None]:
import bitsandbytes as bnb
print("bitsandbytes version:", bnb.__version__)



ModuleNotFoundError: No module named 'triton.ops'