In [None]:
# ============================================================================
# CELL 1: UNSLOTH INSTALLATION (KAGGLE SPECIFIC)
# ============================================================================

# Uninstall any existing unsloth
!pip uninstall -y unsloth unsloth-zoo

# Install latest stable version for Kaggle
!pip install --upgrade --no-cache-dir "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git"

print("‚úÖ Unsloth installed!")

## Overview

This notebook demonstrates two different approaches to fine-tuning a language model for generating professional articles from rough notes using the CNN/DailyMail dataset. Both methods use **Supervised Fine-Tuning (SFT)** with LoRA (Low-Rank Adaptation), but differ in how they monitor and optimize for quality.

### Key Differences

| Aspect | Model 1: Standard SFT | Model 2: RL-Enhanced SFT |
|--------|----------------------|--------------------------|
| **Training Method** | Standard supervised learning | SFT with reward-based monitoring |
| **Loss Function** | Cross-entropy only | Cross-entropy with hallucination detection |
| **Monitoring** | Training loss | Training loss + reward signals |
| **Anti-Hallucination** | Implicit (via data quality) | Explicit (via ground truth comparison) |
| **Complexity** | Simple, straightforward | More complex, research-oriented |

---

In [108]:
import spacy
from datasets import load_dataset, Dataset
from collections import defaultdict
import re
from tqdm import tqdm
# Load spacy
try:
    nlp = spacy.load("en_core_web_sm")
    print("‚úÖ Spacy loaded successfully")
except:
    print("‚ùå Spacy not found. Run: python -m spacy download en_core_web_sm")
    exit(1)

‚úÖ Spacy loaded successfully


In [None]:
dataset

## Loading Model

In [109]:
from unsloth import FastLanguageModel, is_bfloat16_supported
from datasets import Dataset
from transformers import TrainingArguments
from trl import SFTTrainer
import torch


In [110]:
import torch
import re
import json
import matplotlib.pyplot as plt
from difflib import SequenceMatcher
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
import pandas as pd

print("‚úÖ Imports complete!")
print(f"üî• GPU available: {torch.cuda.is_available()}")
print(f"üíª Device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")


‚úÖ Imports complete!
üî• GPU available: True
üíª Device: Tesla T4


In [112]:
print("‚öôÔ∏è Configuring LoRA...")

model = FastLanguageModel.get_peft_model(
    model,
    r=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3712,
)

‚öôÔ∏è Configuring LoRA...


TypeError: Unsloth: Your model already has LoRA adapters. Your new parameters are different.

In [27]:
print("\nüìö Loading POS-constrained dataset...")
from datasets import load_dataset, Dataset
import json
try:
  
    path = "/kaggle/input/cnn-data/cnn_training_data_fixed.json"
    json_path = "/kaggle/input/cnn-data/cnn_training_data_fixed.json"
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    print("data here")
    dataset = Dataset.from_list(data)
    print(f"‚úÖ Loaded {len(dataset)} examples")
except:
    print("‚ùå Dataset not found!")
    print("Run: python generate_pos_dataset.py first")
    exit(1)



üìö Loading POS-constrained dataset...
data here
‚úÖ Loaded 13629 examples


In [28]:
EOS_TOKEN = tokenizer.eos_token

def format_for_training(examples):
    """Format messages for training"""
    texts = []

    for messages in examples['messages']:
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=False
        ) + EOS_TOKEN
        texts.append(text)

    return {"text": texts}

dataset = dataset.map(
    format_for_training,
    batched=True,
    remove_columns=[col for col in dataset.column_names if col != 'text'],
)

print(f"‚úÖ Formatted {len(dataset)} training examples")

Map:   0%|          | 0/13629 [00:00<?, ? examples/s]

‚úÖ Formatted 13629 training examples


## Model 1

In [16]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=2048,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        # Batch size
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,

        # üî• ANTI-HALLUCINATION: Lower learning rate
        learning_rate=5e-5,  # ‚¨áÔ∏è Much lower (was 2e-4)

        # Warmup (stability)
        warmup_steps=200,  # ‚¨ÜÔ∏è More warmup
        warmup_ratio=0.05,

        # Training duration
        num_train_epochs=5,  # ‚¨ÜÔ∏è More epochs on quality data

        # Optimization
        optim="adamw_8bit",
        weight_decay=0.02,  # ‚¨ÜÔ∏è More regularization
        lr_scheduler_type="cosine",
        max_grad_norm=0.3,  # ‚¨áÔ∏è Stricter gradient clipping

        # LoRA settings (add to get_peft_model earlier)
        # r=16,  # ‚¨áÔ∏è Smaller rank (less overfitting)
        # lora_alpha=16,
        # lora_dropout=0.15,  # ‚¨ÜÔ∏è Higher dropout

        # Precision
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),

        # Logging
        logging_steps=10,
        logging_first_step=True,

        # Saving
        save_strategy="steps",
        save_steps=250,
        save_total_limit=3,

        # Output
        output_dir="outputs_anti_hallucination",
        report_to="none",
        seed=3407,
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/4999 [00:00<?, ? examples/s]

In [17]:
print("\n" + "="*70)
print("üìä PRE-TRAINING DIAGNOSTICS")
print("="*70)

print(f"\nüìö Dataset:")
print(f"  Total examples: {len(dataset)}")
print(f"  Effective batch size: 8")
print(f"  Steps per epoch: ~{len(dataset) // 8}")
print(f"  Total training steps: ~{(len(dataset) // 8) * 3}")

trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total = sum(p.numel() for p in model.parameters())
print(f"\nüîß Model:")
print(f"  Trainable params: {trainable:,}")
print(f"  Total params: {total:,}")
print(f"  Trainable: {100 * trainable / total:.2f}%")

print(f"\nüìà Training Config:")
print(f"  Learning rate: 2e-4")
print(f"  Warmup steps: 100")
print(f"  Epochs: 3")
print(f"  Max sequence length: {max_seq_length}")

print("\n" + "="*70)
print("‚úÖ Ready to train!")
print("="*70 + "\n")


üìä PRE-TRAINING DIAGNOSTICS

üìö Dataset:
  Total examples: 4999
  Effective batch size: 8
  Steps per epoch: ~624
  Total training steps: ~1872

üîß Model:
  Trainable params: 22,544,384
  Total params: 796,985,344
  Trainable: 2.83%

üìà Training Config:
  Learning rate: 2e-4
  Warmup steps: 100
  Epochs: 3
  Max sequence length: 2048

‚úÖ Ready to train!



## Start Training for Model 1

In [18]:
trainer_stats = trainer.train()

print("\n" + "="*70)
print("‚úÖ TRAINING COMPLETE!")
print("="*70)
print(f"Final loss: {trainer_stats.training_loss:.4f}")

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 4,999 | Num Epochs = 5 | Total steps = 3,125
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 22,544,384 of 1,258,358,784 (1.79% trained)


Step,Training Loss
1,2.8811
10,2.8321
20,2.8747
30,2.7407
40,2.6835
50,2.6129
60,2.4771
70,2.3516
80,2.3041
90,2.2289



‚úÖ TRAINING COMPLETE!
Final loss: 1.9877


In [19]:
output_dir = "Llama-3.2-1B-CNN-Article-Writer"
print(f"\nüíæ Saving model to: {output_dir}")

model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

print("‚úÖ Model saved!")



üíæ Saving model to: Llama-3.2-1B-CNN-Article-Writer
‚úÖ Model saved!


In [22]:
output_dir = "Llama-3.2-1B-CNN-Article-Writer-Full"
print(f"\nüíæ Saving FULL merged model to: {output_dir}")

# Option 1: Use Unsloth's built-in merger (RECOMMENDED)
model.save_pretrained_merged(
    output_dir,
    tokenizer,
    save_method="merged_16bit",  # or "merged_4bit" for smaller size
)

print("‚úÖ Full merged model saved!")

# Option 2: Manual merge (if Option 1 doesn't work)
"""
from peft import get_peft_model

# Merge LoRA weights into base model
merged_model = model.merge_and_unload()

# Save the merged model
merged_model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

print("‚úÖ Full mer")"""


üíæ Saving FULL merged model to: Llama-3.2-1B-CNN-Article-Writer-Full


config.json:   0%|          | 0.00/894 [00:00<?, ?B/s]

Found HuggingFace hub cache directory: /root/.cache/huggingface/hub
Checking cache directory for required files...
Cache check failed: model.safetensors not found in local cache.
Not all required files found in cache. Will proceed with downloading.
Checking cache directory for required files...
Cache check failed: tokenizer.model not found in local cache.
Not all required files found in cache. Will proceed with downloading.


Unsloth: Preparing safetensor model files:   0%|          | 0/1 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [01:02<00:00, 62.57s/it]


Note: tokenizer.model not found (this is OK for non-SentencePiece models)


Unsloth: Merging weights into 16bit: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:50<00:00, 50.78s/it]

Unsloth: Merge process complete. Saved to `/content/Llama-3.2-1B-CNN-Article-Writer-Full`
‚úÖ Full merged model saved!





'\nfrom peft import get_peft_model\n\n# Merge LoRA weights into base model\nmerged_model = model.merge_and_unload()\n\n# Save the merged model\nmerged_model.save_pretrained(output_dir)\ntokenizer.save_pretrained(output_dir)\n\nprint("‚úÖ Full mer")'

## Model 2 

In [39]:
class GroundTruthHallucinationDetector:
    """Detect hallucinations by comparing to ground truth + rough notes"""
    
    def extract_entities(self, text):
        """Extract named entities from text"""
        if not text:
            return {'names': set(), 'numbers': set(), 'organizations': set(), 'quotes': []}
            
        return {
            'names': set(re.findall(r'(?:Dr\.|Mr\.|Ms\.|Mrs\.|Rep\.|Sen\.|Prof\.)\s+\w+(?:\s+\w+)?', text)),
            'numbers': set(re.findall(r'\b\d+(?:\.\d+)?(?:\s*(?:percent|%|billion|million|thousand))?\b', text)),
            'organizations': set(re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,3}\s+(?:Institute|University|Company|Corporation|Association|Committee|Department)', text)),
            'quotes': re.findall(r'"([^"]+)"', text),
        }
    
    def detect_hallucinations(self, rough_notes, ground_truth, model_output):
        """Find what model invented that's NOT in rough notes OR ground truth"""
        
        rough_ent = self.extract_entities(rough_notes)
        truth_ent = self.extract_entities(ground_truth)
        output_ent = self.extract_entities(model_output)
        
        hallucinations = {}
        penalty = 0.0
        
        # Check names
        allowed_names = rough_ent['names'] | truth_ent['names']
        invented_names = output_ent['names'] - allowed_names
        hallucinations['names'] = list(invented_names)
        penalty += len(invented_names) * 0.30  # Heavy penalty
        
        # Check numbers
        allowed_nums = rough_ent['numbers'] | truth_ent['numbers']
        invented_nums = output_ent['numbers'] - allowed_nums
        hallucinations['numbers'] = list(invented_nums)
        penalty += len(invented_nums) * 0.20
        
        # Check organizations
        allowed_orgs = rough_ent['organizations'] | truth_ent['organizations']
        invented_orgs = output_ent['organizations'] - allowed_orgs
        hallucinations['orgs'] = list(invented_orgs)
        penalty += len(invented_orgs) * 0.25
        
        # Check quotes
        allowed_quotes = set(rough_ent['quotes'] + truth_ent['quotes'])
        for quote in output_ent['quotes']:
            if allowed_quotes:
                matches = [SequenceMatcher(None, quote.lower(), aq.lower()).ratio() 
                          for aq in allowed_quotes]
                if not matches or max(matches) < 0.8:
                    penalty += 0.20
            else:
                penalty += 0.20  # Quote when none expected
        
        score = min(penalty, 1.0)
        return hallucinations, score
    
    def get_reward(self, rough_notes, ground_truth, model_output):
        """Convert to reward: -1.0 (bad) to +1.0 (good)"""
        halluc, score = self.detect_hallucinations(rough_notes, ground_truth, model_output)
        reward = 1.0 - (2.0 * score)  # Maps [0,1] to [1,-1]
        return reward, halluc, score

detector = GroundTruthHallucinationDetector()
print("‚úÖ Hallucination detector ready!")


‚úÖ Hallucination detector ready!


In [113]:
max_seq_length = 2048
dtype = None  # Auto-detect
load_in_4bit = True  # Use 4bit for Kaggle GPU memory

print("üîÑ Loading base model with Unsloth...")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-1B-Instruct",
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True,
)

model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0.15,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
)



model.print_trainable_parameters()
print("‚úÖ LoRA adapters added!")

üîÑ Loading base model with Unsloth...
==((====))==  Unsloth 2026.1.4: Fast Llama patching. Transformers: 4.57.1.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.563 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.10.0+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.6.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.34. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
trainable params: 11,272,192 || all params: 1,247,086,592 || trainable%: 0.9039
‚úÖ LoRA adapters added!


In [114]:
def patch_unsloth_loss():
    """Fix the 'int' has no attribute 'mean' error"""
    from trl import SFTTrainer
    import torch
    
    original_compute_loss = SFTTrainer.compute_loss
    
    def fixed_compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        """Ensure loss is always a proper tensor"""
        
        # Get loss from original method
        if return_outputs:
            loss, outputs = original_compute_loss(self, model, inputs, return_outputs=True, num_items_in_batch=num_items_in_batch)
        else:
            loss = original_compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=num_items_in_batch)
        
        # Fix: ensure loss is a tensor
        if not isinstance(loss, torch.Tensor):
            loss = torch.tensor(loss, device=model.device, dtype=torch.float32)
        
        # Fix: ensure it's scalar
        if loss.dim() > 0:
            loss = loss.mean()
        
        if return_outputs:
            return loss, outputs
        return loss
    
    SFTTrainer.compute_loss = fixed_compute_loss
    print("‚úÖ Unsloth training step patched!")

patch_unsloth_loss()

‚úÖ Unsloth training step patched!


In [115]:
# ============================================================================
# CELL 6: RL-STYLE TRAINING WITH REWARD WEIGHTING
# ============================================================================

# Custom callback to track hallucinations
from transformers import TrainerCallback

class HallucinationMetricsCallback(TrainerCallback):
    """Track hallucination metrics during training"""
    
    def __init__(self, model, tokenizer, val_examples):
        self.model = model
        self.tokenizer = tokenizer
        self.val_examples = val_examples  # List of dicts with rough_notes, polished_article
        self.detector = GroundTruthHallucinationDetector()
        
        self.metrics = {
            'epoch': [],
            'train_loss': [],
            'val_hallucination': [],
            'val_reward': [],
            'val_invented_names': [],
        }
    
    def on_epoch_end(self, args, state, control, **kwargs):
        """Evaluate hallucinations at end of each epoch"""
        
        epoch = int(state.epoch)
        print(f"\n{'='*70}")
        print(f"üîç EVALUATING HALLUCINATIONS - EPOCH {epoch}")
        print(f"{'='*70}")
        
        FastLanguageModel.for_inference(self.model)
        
        total_halluc = 0
        total_reward = 0
        total_names = 0
        
        num_samples = min(50, len(self.val_examples))
        
        for i in range(num_samples):
            ex = self.val_examples[i]
            rough = ex["rough_notes"]
            truth = ex["polished_article"]
            
            # Generate
            messages = [{
                "role": "user",
                "content": f"Write article from:\n\n{rough}"
            }]
            
            inputs = self.tokenizer.apply_chat_template(
                messages,
                tokenize=True,
                add_generation_prompt=True,
                return_tensors="pt"
            ).to("cuda")
            
            outputs = self.model.generate(
                input_ids=inputs,
                max_new_tokens=350,
                temperature=0.3,
                top_p=0.85,
                do_sample=True,
            )
            
            result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            
            # Score
            reward, halluc, score = self.detector.get_reward(rough, truth, result)
            
            total_halluc += score
            total_reward += reward
            total_names += len(halluc['names'])
        
        # Averages
        avg_halluc = total_halluc / num_samples
        avg_reward = total_reward / num_samples
        avg_names = total_names / num_samples
        
        # Get current loss from log history
        if hasattr(state, 'log_history') and len(state.log_history) > 0:
            recent_logs = [log for log in state.log_history if 'loss' in log]
            current_loss = recent_logs[-1]['loss'] if recent_logs else 0.0
        else:
            current_loss = 0.0
        
        # Store
        self.metrics['epoch'].append(epoch)
        self.metrics['train_loss'].append(current_loss)
        self.metrics['val_hallucination'].append(avg_halluc)
        self.metrics['val_reward'].append(avg_reward)
        self.metrics['val_invented_names'].append(avg_names)
        
        print(f"\nüìä METRICS:")
        print(f"  Train Loss: {current_loss:.4f}")
        print(f"  Val Hallucination: {avg_halluc:.3f}")
        print(f"  Val Reward: {avg_reward:+.3f}")
        print(f"  Val Invented Names: {avg_names:.2f}")
        print(f"{'='*70}\n")
        
        # Put model back in training mode
        self.model.train()

# Prepare validation data for callback
val_raw_for_callback = [
    {
        "rough_notes": data[i]["rough_notes"],
        "polished_article": data[i]["polished_article"]
    }
    for i in range(4500, min(5000, len(data)))
]

# Create callback
metrics_callback = HallucinationMetricsCallback(
    model, tokenizer, val_raw_for_callback
)

print(f"‚úÖ Metrics callback ready with {len(val_raw_for_callback)} validation examples!")

‚úÖ Metrics callback ready with 500 validation examples!


In [125]:
training_args = TrainingArguments(
    # Output
    output_dir="./rl_anti_hallucination",
    
    # Epochs and batch size
    num_train_epochs=3,
    per_device_train_batch_size=2,  # Reduced from 4
    gradient_accumulation_steps=4,
    
    # Learning rate
    learning_rate=2e-4,  # Standard for Unsloth
    warmup_steps=5,
    
    # Optimization
    optim="adamw_8bit",
    weight_decay=0.01,
    max_grad_norm=0.3,
    
    # Scheduler
    lr_scheduler_type="linear",  # Changed from cosine
    
    # Logging
    logging_steps=1,  # Important: Must be 1 or more
    
    # Saving
    save_strategy="epoch",
    save_steps=0.25,  # Save 4 times per epoch
    save_total_limit=2,
    
    # Precision
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    
    # Important for Unsloth
    group_by_length=True,  # Groups similar length sequences
    
    # Other
    report_to="none",
    seed=3407,
)

print("‚úÖ Training configuration ready!")
print(f"   Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}")
print(f"   Total epochs: {training_args.num_train_epochs}")
print(f"   Learning rate: {training_args.learning_rate}")

‚úÖ Training configuration ready!
   Effective batch size: 8
   Total epochs: 3
   Learning rate: 0.0002


In [117]:
# ============================================================================
# CELL 5: LOAD AND FORMAT DATASET (CORRECTED - BATCHED)
# ============================================================================

print("üîÑ Loading your custom dataset...")

json_path = "/kaggle/input/cnn-data/cnn_training_data_fixed.json"
with open(json_path, 'r', encoding='utf-8') as f:
    data = json.load(f)

print(f"üìä Total examples: {len(data)}")

# Create dataset
from datasets import Dataset
dataset = Dataset.from_list(data)

# EOS token
EOS_TOKEN = tokenizer.eos_token

def format_for_training(examples):
    """
    Format batched examples for training
    CRITICAL: This function receives BATCHES, not single examples!
    """
    texts = []
    
    # Loop through batch
    for i in range(len(examples['rough_notes'])):
        
        rough = examples['rough_notes'][i]
        if examples.get("polished_article",-1)==-1:
            truth = rough
        else:
            truth = examples['polished_article'][i]
        
        # Create messages
        messages = [
            {"role": "system", "content": "You are a professional journalist. Write articles from rough notes using ONLY provided information."},
            {"role": "user", "content": f"Write article from:\n\n{rough}"},
            {"role": "assistant", "content": truth}
        ]
        
        # Apply chat template
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=False
        ) + EOS_TOKEN
        
        texts.append(text)
    
    return {"text": texts}

# Apply formatting with BATCHED processing
print("Processing dataset...")
dataset = dataset.map(
    format_for_training,
    batched=True,  # ‚Üê THIS IS CRITICAL!
    batch_size=1000,  # Process 1000 at a time
    remove_columns=[col for col in dataset.column_names if col != 'text'],
)

print(f"‚úÖ Formatted {len(dataset)} examples")

# Split dataset
train_dataset = dataset.select(range(len(dataset)-500))
val_dataset = dataset.select(range(len(dataset)-500,  len(dataset)))

print(f"‚úÖ Train: {len(train_dataset)} examples")
print(f"‚úÖ Val: {len(val_dataset)} examples")


# Verify
print("\nüìù Sample (first 200 chars):")
print(train_dataset[0]['text'][:200] + "...")

üîÑ Loading your custom dataset...
üìä Total examples: 13629
Processing dataset...


Map:   0%|          | 0/13629 [00:00<?, ? examples/s]

‚úÖ Formatted 13629 examples
‚úÖ Train: 13129 examples
‚úÖ Val: 500 examples

üìù Sample (first 200 chars):
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 31 Jan 2026

You are a professional journalist. Write articles from rough notes using ONL...


In [126]:
# ============================================================================
# FIXED VERSION: COMPREHENSIVE FIX FOR UNSLOTH
# ============================================================================

def patch_unsloth_completely():
    """
    Complete fix for Unsloth gradient issues
    Fixes both 'int has no mean' and 'does not require grad' errors
    """
    from trl import SFTTrainer
    import torch
    
    # Save original methods
    original_training_step = SFTTrainer.training_step
    
    def fixed_training_step(self, model, inputs, num_items_in_batch=None):
        """
        Fixed training step that properly handles gradients
        
        Args:
            self: The trainer instance
            model: The model being trained
            inputs: Input batch
            num_items_in_batch: Number of items in batch (optional, for compatibility)
        """
        model.train()
        inputs = self._prepare_inputs(inputs)
        
        # Forward pass with gradient
        with self.compute_loss_context_manager():
            loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
        
        # Ensure loss is proper tensor
        if not isinstance(loss, torch.Tensor):
            loss = torch.tensor(loss, device=model.device, dtype=torch.float32)
        
        # Ensure it's a scalar
        if loss.dim() > 0:
            loss = loss.mean()
        
        # CRITICAL: Ensure loss requires grad
        if not loss.requires_grad:
            # This means loss wasn't computed properly
            # Force re-computation with gradients enabled
            model.train()
            torch.set_grad_enabled(True)
            
            outputs = model(**inputs)
            
            if isinstance(outputs, dict):
                loss = outputs.get("loss")
            else:
                loss = outputs.loss if hasattr(outputs, "loss") else outputs[0]
            
            # Ensure it's a tensor
            if not isinstance(loss, torch.Tensor):
                loss = torch.tensor(loss, device=model.device, dtype=torch.float32, requires_grad=True)
        
        # Scale loss for gradient accumulation
        if self.args.n_gpu > 1:
            loss = loss.mean()
        
        if self.args.gradient_accumulation_steps > 1:
            loss = loss / self.args.gradient_accumulation_steps
        
        # Backward pass
        self.accelerator.backward(loss)
        
        return loss.detach()
    
    SFTTrainer.training_step = fixed_training_step
    print("‚úÖ Unsloth completely patched for gradient computation!")

# Call the patch
patch_unsloth_completely()

‚úÖ Unsloth completely patched for gradient computation!


In [None]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    #eval_dataset=val_dataset,
    dataset_text_field="text",
    max_seq_length=2048,
    dataset_num_proc=2,
    packing=False,
    args=training_args,
    callbacks=[metrics_callback],
)

# NOW IT SHOULD WORK!
trainer.train()

Unsloth: Tokenizing ["text"] (num_proc=8):   0%|          | 0/13629 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 13,629 | Num Epochs = 3 | Total steps = 2,556
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 11,272,192 of 1,247,086,592 (0.90% trained)


Step,Training Loss
1,0.5454
2,0.5511
3,0.5806
4,0.5715
5,0.5538
6,0.5656
7,0.5749
8,0.5479
9,0.5247
10,0.5462


## Saving Trained Model to Hugging Face

In [25]:
!pip install -q huggingface_hub

In [26]:
from huggingface_hub import login

print("="*70)
print("STEP 1: LOGIN TO HUGGINGFACE")
print("="*70)
print("\n1. Go to: https://huggingface.co/settings/tokens")
print("2. Click 'New token'")
print("3. Name: 'upload-models'")
print("4. Type: Write")
print("5. Copy the token")
print("6. Paste below when prompted\n")

# Login
login()

print("‚úÖ Logged in!")

STEP 1: LOGIN TO HUGGINGFACE

1. Go to: https://huggingface.co/settings/tokens
2. Click 'New token'
3. Name: 'upload-models'
4. Type: Write
5. Copy the token
6. Paste below when prompted



VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv‚Ä¶

‚úÖ Logged in!


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [28]:
from huggingface_hub import create_repo

# Set your username and model name
HF_USERNAME = "aryan14072001"  # ‚Üê CHANGE THIS to your HF username
MODEL_NAME = "article-writer-rag"

repo_id = f"{HF_USERNAME}/{MODEL_NAME}"

print(f"\nüîÑ Creating repository: {repo_id}")

try:
    create_repo(
        repo_id=repo_id,
        repo_type="model",
        exist_ok=True,  # Don't error if already exists
        private=False,  # Set to True if you want it private
    )
    print(f"‚úÖ Repository created!")
except Exception as e:
    print(f"‚ÑπÔ∏è  Repository already exists or error: {e}")



üîÑ Creating repository: aryan14072001/article-writer-rag
‚úÖ Repository created!


In [29]:
print("\n" + "="*70)
print("STEP 4: UPLOADING MODEL TO HUGGINGFACE")
print("="*70)

from huggingface_hub import HfApi

api = HfApi()

print(f"\nüîÑ Uploading model to: https://huggingface.co/{repo_id}")
print("   This will take 5-15 minutes (~2.5GB upload)...")

api.upload_folder(
    folder_path=output_dir,
    repo_id=repo_id,
    repo_type="model",
)

print("\n‚úÖ MODEL UPLOADED SUCCESSFULLY!")
print("="*70)


STEP 4: UPLOADING MODEL TO HUGGINGFACE

üîÑ Uploading model to: https://huggingface.co/aryan14072001/article-writer-rag
   This will take 5-15 minutes (~2.5GB upload)...


Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...riter-Full/tokenizer.json: 100%|##########| 17.2MB / 17.2MB            

  ...er-Full/model.safetensors:   1%|1         | 33.5MB / 2.47GB            


‚úÖ MODEL UPLOADED SUCCESSFULLY!


In [30]:
print("\n" + "="*70)
print("STEP 5: CREATING MODEL CARD")
print("="*70)

model_card = f"""---
license: apache-2.0
base_model: unsloth/Llama-3.2-1B-Instruct
tags:
- text-generation
- llama
- fine-tuned
- news-writing
- article-generation
language:
- en
datasets:
- cnn_dailymail
---

# Article Writer - Fine-tuned Llama 3.2 1B

This model is fine-tuned on CNN/DailyMail articles to expand rough bullet-point notes into professional news articles.

## Model Details

- **Base Model**: unsloth/Llama-3.2-1B-Instruct
- **Training**: LoRA fine-tuning on CNN articles
- **Task**: Expand rough notes ‚Üí professional articles
- **Parameters**: 1B
- **Size**: ~2.5GB (16-bit merged)

## Usage

```python
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load model
model = AutoModelForCausalLM.from_pretrained(
    "{repo_id}",
    torch_dtype=torch.float16,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("{repo_id}")

# Generate article
messages = [
    {{"role": "system", "content": "You are a professional journalist."}},
    {{"role": "user", "content": "Write article from:\\n\\n‚Ä¢ Fact 1\\n‚Ä¢ Fact 2"}}
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.3)
article = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(article)
```

## Training Details

- **Dataset**: CNN/DailyMail articles
- **Method**: LoRA (Low-Rank Adaptation)
- **Epochs**: 5
- **Learning Rate**: 5e-5
- **Anti-hallucination**: Lower temperature, higher regularization

## Limitations

- Designed for news article generation
- May not work well for other writing styles
- Requires factual input notes

## License

Apache 2.0
"""

# Save model card
with open(f"{output_dir}/README.md", "w") as f:
    f.write(model_card)

# Upload model card
api.upload_file(
    path_or_fileobj=f"{output_dir}/README.md",
    path_in_repo="README.md",
    repo_id=repo_id,
    repo_type="model",
)

print("‚úÖ Model card uploaded!")

# ============================================================================
# SUMMARY
# ============================================================================

print("\n" + "="*70)
print("‚úÖ ALL DONE!")
print("="*70)

print(f"""
Your model is now on HuggingFace! üéâ

üìç Model URL: https://huggingface.co/{repo_id}

Next steps:
1. Visit your model page to verify it uploaded
2. Test loading it:

   from transformers import AutoModelForCausalLM
   model = AutoModelForCausalLM.from_pretrained("{repo_id}")

3. Now you can use it in HuggingFace Spaces!

   In your Space's app.py, set:
   MODEL_NAME = "{repo_id}"

4. Deploy to Spaces (see DEPLOYMENT_GUIDE.py)

Model info:
- Name: {repo_id}
- Size: ~2.5GB
- Format: Full merged model (ready to use)
- Visibility: Public (change in repo settings if needed)
""")

print("="*70)



STEP 5: CREATING MODEL CARD
‚úÖ Model card uploaded!

‚úÖ ALL DONE!

Your model is now on HuggingFace! üéâ

üìç Model URL: https://huggingface.co/aryan14072001/article-writer-rag

Next steps:
1. Visit your model page to verify it uploaded
2. Test loading it:
   
   from transformers import AutoModelForCausalLM
   model = AutoModelForCausalLM.from_pretrained("aryan14072001/article-writer-rag")
   
3. Now you can use it in HuggingFace Spaces!
   
   In your Space's app.py, set:
   MODEL_NAME = "aryan14072001/article-writer-rag"

4. Deploy to Spaces (see DEPLOYMENT_GUIDE.py)

Model info:
- Name: aryan14072001/article-writer-rag
- Size: ~2.5GB
- Format: Full merged model (ready to use)
- Visibility: Public (change in repo settings if needed)



##  Testing Model


In [40]:
print("\n" + "="*70)
print("üß™ TESTING MODEL")
print("="*70)

FastLanguageModel.for_inference(model)

# Test cases
test_cases = [
    """‚Ä¢ Scientists testified before House Subcommittee on Domestic Policy about cell phone radiation and brain cancer
‚Ä¢ Dr. Ronald Herberman (University of Pittsburgh Cancer Institute) said he can't say cell phones are definitely safe or dangerous
‚Ä¢ Dr. David Carpenter (University of Albany) cited a study that found people who use cell phones have double the chance of developing malignant brain tumors
‚Ä¢ The study also found people under 20 have 5x more chance of developing brain cancer
‚Ä¢ Dr. Lennart Hardell's study was recently presented, but hasn't been peer-reviewed yet
‚Ä¢ Dr. Robert Hoover (National Cancer Institute) says evidence is inconclusive and more research is needed""",

    """‚Ä¢ Tech company announces new AI product
‚Ä¢ CEO says it will revolutionize industry
‚Ä¢ Product launches next month
‚Ä¢ Priced at $999
‚Ä¢ Available in 50 countries"""
]

for i, test_notes in enumerate(test_cases, 1):
    print(f"\n{'='*70}")
    print(f"TEST CASE {i}:")
    print("="*70)
    print("\nüîµ INPUT (Rough Notes):")
    print(test_notes)

    messages = [
    {
        "role": "system",
        "content": "You are a professional journalist. Expand rough notes into complete, well-written news articles. Use all facts from the notes. Add professional structure but do not invent details not present in the source material."
    },
    {
        "role": "user",
        "content": f"Expand these rough notes into a professional news article:\n\n{test_notes}"
    }
]


    prompt = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# Generate with BALANCED settings
    outputs = model.generate(
    **inputs,
    max_new_tokens=512,
    temperature=0.3,
    top_p=0.9,
    top_k=50,
    repetition_penalty=1.05,        # KEY: Lower penalty
    no_repeat_ngram_size=3,         # KEY: Prevents phrase repetition
    do_sample=True,
    pad_token_id=tokenizer.eos_token_id,
)

    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    article = result.split("assistant")[-1].strip()

    print("\nüü¢ OUTPUT (Generated Article):")
    print(article)
    print("="*70)

print("\n" + "="*70)
print("‚úÖ ALL DONE!")
print("="*70)
print(f"\nYour model is saved in: {output_dir}")
print("\nTo use it later:")
print(f'  model, tokenizer = FastLanguageModel.from_pretrained("{output_dir}")')


üß™ TESTING MODEL

TEST CASE 1:

üîµ INPUT (Rough Notes):
‚Ä¢ Scientists testified before House Subcommittee on Domestic Policy about cell phone radiation and brain cancer
‚Ä¢ Dr. Ronald Herberman (University of Pittsburgh Cancer Institute) said he can't say cell phones are definitely safe or dangerous
‚Ä¢ Dr. David Carpenter (University of Albany) cited a study that found people who use cell phones have double the chance of developing malignant brain tumors
‚Ä¢ The study also found people under 20 have 5x more chance of developing brain cancer
‚Ä¢ Dr. Lennart Hardell's study was recently presented, but hasn't been peer-reviewed yet
‚Ä¢ Dr. Robert Hoover (National Cancer Institute) says evidence is inconclusive and more research is needed

üü¢ OUTPUT (Generated Article):

TEST CASE 2:

üîµ INPUT (Rough Notes):
‚Ä¢ Tech company announces new AI product
‚Ä¢ CEO says it will revolutionize industry
‚Ä¢ Product launches next month
‚Ä¢ Priced at $999
‚Ä¢ Available in 50 countries

üü¢ 

In [24]:
!zip -r /content/CNN_model_Full.zip /content/Llama-3.2-1B-CNN-Article-Writer-Full



  adding: content/Llama-3.2-1B-CNN-Article-Writer-Full/ (stored 0%)
  adding: content/Llama-3.2-1B-CNN-Article-Writer-Full/tokenizer_config.json (deflated 94%)
  adding: content/Llama-3.2-1B-CNN-Article-Writer-Full/model.safetensors (deflated 21%)
  adding: content/Llama-3.2-1B-CNN-Article-Writer-Full/special_tokens_map.json (deflated 71%)
  adding: content/Llama-3.2-1B-CNN-Article-Writer-Full/config.json (deflated 56%)
  adding: content/Llama-3.2-1B-CNN-Article-Writer-Full/chat_template.jinja (deflated 71%)
  adding: content/Llama-3.2-1B-CNN-Article-Writer-Full/.cache/ (stored 0%)
  adding: content/Llama-3.2-1B-CNN-Article-Writer-Full/.cache/huggingface/ (stored 0%)
  adding: content/Llama-3.2-1B-CNN-Article-Writer-Full/.cache/huggingface/.gitignore (stored 0%)
  adding: content/Llama-3.2-1B-CNN-Article-Writer-Full/.cache/huggingface/download/ (stored 0%)
  adding: content/Llama-3.2-1B-CNN-Article-Writer-Full/.cache/huggingface/download/model.safetensors.lock (stored 0%)
  adding: con

In [42]:
pip install langchain_groq

Collecting langchain_groq
  Downloading langchain_groq-1.1.1-py3-none-any.whl.metadata (2.4 kB)
Collecting groq<1.0.0,>=0.30.0 (from langchain_groq)
  Downloading groq-0.37.1-py3-none-any.whl.metadata (16 kB)
Downloading langchain_groq-1.1.1-py3-none-any.whl (19 kB)
Downloading groq-0.37.1-py3-none-any.whl (137 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m137.5/137.5 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq, langchain_groq
Successfully installed groq-0.37.1 langchain_groq-1.1.1


## Enriching Input Using Groq

In [44]:
pip install langchain_community langchain_groq langchain_core

Collecting langchain_community
  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-classic<2.0.0,>=1.0.0 (from langchain_community)
  Downloading langchain_classic-1.0.1-py3-none-any.whl.metadata (4.2 kB)
Collecting requests<3.0.0,>=2.32.5 (from langchain_community)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain_community)
  Downloading marshmallow-3.26.2-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting langchain-text-splitters<2.0.0,>=1.1.0 (from langchain-classic<2.0.0,>=1.0.0->langchain_community)
  Downloading langchain_text_splitters-1.1.0

In [49]:
pip install -U ddgs

Collecting ddgs
  Downloading ddgs-9.10.0-py3-none-any.whl.metadata (12 kB)
Collecting primp>=0.15.0 (from ddgs)
  Downloading primp-0.15.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting fake-useragent>=2.2.0 (from ddgs)
  Downloading fake_useragent-2.2.0-py3-none-any.whl.metadata (17 kB)
Collecting socksio==1.* (from httpx[brotli,http2,socks]>=0.28.1->ddgs)
  Downloading socksio-1.0.0-py3-none-any.whl.metadata (6.1 kB)
Downloading ddgs-9.10.0-py3-none-any.whl (40 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m40.3/40.3 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fake_useragent-2.2.0-py3-none-any.whl (161 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m161.7/161.7 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading socksio

In [45]:
from langchain_groq import ChatGroq
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_core.messages import HumanMessage, SystemMessage
import os

In [None]:
GROQ_API_KEY = ""
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [50]:
llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0.1,
)

# Search tool
search = DuckDuckGoSearchRun()

print("‚úÖ Setup complete!")

‚úÖ Setup complete!


In [62]:
SYSTEM_PROMPT = """You are a professional journalist writing from source material.

STRICT REQUIREMENTS:
- Include all facts from the rough notes
- Use verified background for additional context
- Do NOT invent names, quotes, numbers, or organizations
- When information is missing, use generic terms ("a scientist", "researchers")
- Preserve uncertainty ("may", "could") when present

Write a clear, professional article using ONLY the information provided."""

In [51]:
def enrich_notes(rough_notes):
    """
    Step 1: Ask Groq to identify what to search
    Step 2: Search for each entity
    Step 3: Ask Groq to compile the results
    """

    print("\n" + "="*70)
    print("üî¨ ENRICHING NOTES")
    print("="*70)

    # STEP 1: Identify entities to research
    print("\nüìã Step 1: Identifying entities...")

    identify_prompt = f"""Analyze these rough article notes and identify 3-5 key entities that need background research.

NOTES:
{rough_notes}

List entities in this format:
1. [Person/Org/Topic]
2. [Person/Org/Topic]
...

Only list names, keep it simple."""

    messages = [
        SystemMessage(content="You are a research assistant."),
        HumanMessage(content=identify_prompt)
    ]

    response = llm.invoke(messages)
    entities_text = response.content
    print(f"Entities identified:\n{entities_text}")

    # STEP 2: Search for each entity
    print("\nüîé Step 2: Searching web...")

    # Extract entity names (simple parsing)
    import re
    entities = re.findall(r'\d+\.\s*(.+)', entities_text)
    entities = [e.strip() for e in entities[:5]]  # Max 5 searches

    all_search_results = []

    for i, entity in enumerate(entities, 1):
        if not entity:
            continue

        print(f"  [{i}/{len(entities)}] Searching: {entity}")

        try:
            results = search.run(entity)
            all_search_results.append(f"ENTITY: {entity}\nRESULTS: {results}\n")
        except Exception as e:
            print(f"    ‚ö†Ô∏è Search failed: {e}")

    if not all_search_results:
        return "No background information found."

    # STEP 3: Compile enriched context
    print("\nüìù Step 3: Compiling verified facts...")

    combined_results = "\n".join(all_search_results)

    compile_prompt = f"""You are a fact-checking journalist. Extract ONLY verified, factual information from these search results.

ORIGINAL NOTES:
{rough_notes}

SEARCH RESULTS:
{combined_results}

Create a concise summary with:

VERIFIED BACKGROUND:
‚Ä¢ [Verified fact 1 with source context]
‚Ä¢ [Verified fact 2 with source context]
...

Max 10 facts. Be concise and factual."""

    messages = [
        SystemMessage(content="You are a fact-checking journalist."),
        HumanMessage(content=compile_prompt)
    ]

    response = llm.invoke(messages)
    enriched_context = response.content

    print("\n‚úÖ Enrichment complete!")
    print(f"\n{enriched_context}")

    return enriched_context


In [60]:
def write_article(rough_notes, enriched_context):

  messages = [
    {
        "role": "system",
        "content": SYSTEM_PROMPT  # The comprehensive prompt
    },
    {
        "role": "user",
        "content": f"""ROUGH NOTES:
{rough_notes}

VERIFIED BACKGROUND:
{enriched_context}

Write the article:"""
    }
]
  prompt = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# Generate with BALANCED settings
  outputs = model.generate(
    **inputs,
    max_new_tokens=512,
    temperature=0.3,
    top_p=0.9,
    top_k=50,
    repetition_penalty=1.05,        # KEY: Lower penalty
    no_repeat_ngram_size=3,         # KEY: Prevents phrase repetition
    do_sample=True,
    pad_token_id=tokenizer.eos_token_id,
)

  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
  article = result.split("assistant")[-1].strip()
  return article


In [52]:
def generate_article_with_rag(rough_notes, use_rag=True):
    """Complete RAG pipeline"""

    print("\n" + "="*70)
    print("üöÄ RAG PIPELINE")
    print("="*70)
    print(f"\nüì• INPUT:\n{rough_notes}\n")

    # Step 1: Enrich
    if use_rag:
        enriched_context = enrich_notes(rough_notes)
    else:
        enriched_context = "RAG disabled"
        print("\n‚ö†Ô∏è  RAG disabled")

    # Step 2: Write
    article = write_article(rough_notes, enriched_context)

    print("\n" + "="*70)
    print("üìÑ FINAL ARTICLE:")
    print("="*70)
    print(article)
    print("="*70)

    return article, enriched_context


In [64]:
test_notes = """‚Ä¢ Apple announces new iPhone 16
‚Ä¢ Features improved camera with 48MP sensor
‚Ä¢ Battery life increased by 20%
‚Ä¢ Priced at $899 for base model
‚Ä¢ Available October 15th
‚Ä¢ Comes in 4 colors: black, white, blue, pink"""

    # Test with RAG
article, context = generate_article_with_rag(test_notes, use_rag=True)


üöÄ RAG PIPELINE

üì• INPUT:
‚Ä¢ Apple announces new iPhone 16
‚Ä¢ Features improved camera with 48MP sensor
‚Ä¢ Battery life increased by 20%
‚Ä¢ Priced at $899 for base model
‚Ä¢ Available October 15th
‚Ä¢ Comes in 4 colors: black, white, blue, pink


üî¨ ENRICHING NOTES

üìã Step 1: Identifying entities...
Entities identified:
1. Apple
2. iPhone 16 
3. iPhone

üîé Step 2: Searching web...
  [1/3] Searching: Apple
  [2/3] Searching: iPhone 16
  [3/3] Searching: iPhone

üìù Step 3: Compiling verified facts...

‚úÖ Enrichment complete!

VERIFIED BACKGROUND:
‚Ä¢ Apple is an American multinational technology company that designs, manufactures, and markets consumer electronics, personal computers, software, and online services. (Source: Apple entity search results)
‚Ä¢ The iPhone is a line of smartphones developed and marketed by Apple that run iOS, the company's own mobile operating system. (Source: iPhone entity search results)
‚Ä¢ The first-generation iPhone was introduced by St