In [None]:
!pip install --upgrade transformers accelerate bitsandbytes huggingface_hub trl evaluate rouge_score nltk
import os
import re
import json
import torch
import numpy as np
import pandas as pd
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from trl import SFTTrainer
from evaluate import load

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
os.environ["WANDB_DISABLED"] = "true"
os.environ["HF_TOKEN"] = ""

Collecting transformers
  Downloading transformers-4.53.0-py3-none-any.whl.metadata (39 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting huggingface_hub
  Downloading huggingface_hub-0.33.1-py3-none-any.whl.metadata (14 kB)
Collecting trl
  Downloading trl-0.19.0-py3-none-any.whl.metadata (10 kB)
Collecting evaluate
  Downloading evaluate-0.4.4-py3-none-any.whl.metadata (9.5 kB)
Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting datasets>=3.0.0 (from trl)
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting fsspec>=2023.5.0 (from huggingface_hub)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtim

In [None]:
print("🚀 Initializing model and tokenizer...")
model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
print(f"🔍 Model selected: {model_id}")

🚀 Initializing model and tokenizer...
🔍 Model selected: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B


In [None]:
# ==== Check GPU availability ====
if not torch.cuda.is_available():
    print("⚠️ WARNING: No GPU detected. Quantization will not work efficiently. Switch to a GPU runtime if possible.")
else:
    print("✅ GPU detected! Proceeding with quantization.")


✅ GPU detected! Proceeding with quantization.


In [None]:
print("⚙️ Configuring 4-bit quantization...")
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True  # Saves additional 0.4 bits/param
)

print("⬇️ Downloading and loading quantized model...")
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=quant_config,
    token=os.environ["HF_TOKEN"]
)
print("✅ Model successfully loaded with 4-bit quantization!")

print("\n⬇️ Downloading and loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ["HF_TOKEN"])
print("✅ Tokenizer successfully loaded!")

⚙️ Configuring 4-bit quantization...
⬇️ Downloading and loading quantized model...


config.json:   0%|          | 0.00/679 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.55G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


✅ Model successfully loaded with 4-bit quantization!

⬇️ Downloading and loading tokenizer...


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

✅ Tokenizer successfully loaded!


In [None]:
from google.colab import files
print("📤 Uploading dataset files...")
files.upload()
print("\n🔍 Files in directory:", os.listdir())

📤 Uploading dataset files...


Saving combined_dataset.jsonl to combined_dataset.jsonl

🔍 Files in directory: ['.config', 'combined_dataset.jsonl', 'sample_data']


In [None]:
# ==== Preview the first few lines to check format ====
print("📄 Previewing first 5 lines of combined_dataset.jsonl:")
with open('combined_dataset.jsonl', 'r', encoding='utf-8') as f:
    for i in range(5):
        print(f"Line {i+1}: {next(f).strip()}")
print("")


📄 Previewing first 5 lines of combined_dataset.jsonl:
Line 1: {"instruction": "Suggest a solution for this IT incident", "input": "Incident Number: INC0010289\nShort Description: Azure SQL Managed Instance randomly freezes/locks up and must be Stopped/Restarted\nDescription: \nPriority: 3\nService: \nCustomer Comments: ", "output": "1. Issue Summary:\" etc. But in the example, the steps are written with **Step 1:** etc. Wait, in the example given, under Remediation Steps, each step is bolded with **Step 1: Analyze Resource Consumption** etc. But the user's initial structure says to use \"Step-by-step solution with clear commands if needed. Use a blank line between steps.\"\n\nSo I should follow the same formatting as in the example, using bold for step headers. However, the user mentioned to avoid any markdown except the four sections. Wait, no\u2014the user provided the example with markdown but instructed to follow the structure. However, the assistant is told to respond using only t

In [None]:
# 3. ==== Load and anonymize dataset ====
print("📊 Loading JSONL into DataFrame...")
try:
    df = pd.read_json('combined_dataset.jsonl', lines=True)
    print("✅ Successfully loaded DataFrame!")
except Exception as e:
    print(f"❌ Error loading DataFrame: {e}")
    exit()

print("🛡️ Anonymizing sensitive data...")
def anonymize_text(text):
    # Add your organization-specific PII patterns here
    patterns = {
        r'\b\d{3}-\d{2}-\d{4}\b': '[SSN]',          # Social Security Numbers
        r'\b\d{3}\.\d{3}\.\d{4}\b': '[PHONE]',      # Phone numbers
        r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b': '[EMAIL]',
        r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b': '[IP]' # IP addresses
    }
    for pattern, replacement in patterns.items():
        text = re.sub(pattern, replacement, text)
    return text

# Apply anonymization to all text fields
for column in ['instruction', 'input', 'output']:
    df[column] = df[column].apply(anonymize_text)

📊 Loading JSONL into DataFrame...
✅ Successfully loaded DataFrame!
🛡️ Anonymizing sensitive data...


In [None]:
# Format dataset
print("🔄 Formatting for instruction-tuning (Alpaca style)...")
def format_step_by_step(row):
    # Ensure output is formatted as numbered steps
    steps = [s.strip() for s in row['output'].split('\n') if s.strip()]
    numbered_steps = '\n'.join([f"{i+1}. {step}" for i, step in enumerate(steps)])
    if row['input']:
        return f"### Instruction:\n{row['instruction']}\n\n### Input:\n{row['input']}\n\n### Response:\n{numbered_steps}"
    else:
        return f"### Instruction:\n{row['instruction']}\n\n### Response:\n{numbered_steps}"

formatted_data = [{"text": format_step_by_step(row)} for _, row in df.iterrows()]
dataset = Dataset.from_pandas(pd.DataFrame(formatted_data))

# Preview first formatted sample
print("\nPreview of first formatted sample:")
print(dataset[0])

🔄 Formatting for instruction-tuning (Alpaca style)...

Preview of first formatted sample:
{'text': '### Instruction:\nSuggest a solution for this IT incident\n\n### Input:\nIncident Number: INC0010289\nShort Description: Azure SQL Managed Instance randomly freezes/locks up and must be Stopped/Restarted\nDescription: \nPriority: 3\nService: \nCustomer Comments: \n\n### Response:\n1. 1. Issue Summary:" etc. But in the example, the steps are written with **Step 1:** etc. Wait, in the example given, under Remediation Steps, each step is bolded with **Step 1: Analyze Resource Consumption** etc. But the user\'s initial structure says to use "Step-by-step solution with clear commands if needed. Use a blank line between steps."\n2. So I should follow the same formatting as in the example, using bold for step headers. However, the user mentioned to avoid any markdown except the four sections. Wait, no—the user provided the example with markdown but instructed to follow the structure. However, t

In [None]:
 # Save formatted dataset
print("💾 Saving formatted dataset...")
with open("formatted_dataset.jsonl", "w", encoding="utf-8") as f:
  for record in formatted_data:
    json.dump(record, f, ensure_ascii=False)
    f.write('\n')
print("✅ Saved as formatted_dataset.jsonl")

💾 Saving formatted dataset...
✅ Saved as formatted_dataset.jsonl


In [None]:
# Verify dataset file exists
if not os.path.exists('formatted_dataset.jsonl'):
    raise FileNotFoundError("❌ Dataset file missing. Upload 'formatted_dataset.jsonl' first.")

# Load data manually (bypassing filesystem issues)
data = []
with open('formatted_dataset.jsonl', 'r', encoding='utf-8') as f:
    for line in f:
        data.append(json.loads(line))

dataset = Dataset.from_list(data)
n_total = len(dataset)
print(f"✅ Loaded {n_total} samples")

# Smart splitting with size validation
print("✂️ Splitting dataset (80/10/10)...")
split = dataset.train_test_split(test_size=0.2, seed=42)
valid_test = split['test'].train_test_split(test_size=0.5, seed=42)
train_dataset = split['train']
valid_dataset = valid_test['train']
test_dataset = valid_test['test']

# Final validation
assert len(train_dataset) > 0, "Train set is empty"
assert len(valid_dataset) > 0, "Validation set is empty"
assert len(test_dataset) > 0, "Test set is empty"

print(f"🚀 Successfully split dataset:")
print(f"   Train: {len(train_dataset)} samples ({len(train_dataset)/n_total:.0%})")
print(f"   Validation: {len(valid_dataset)} samples ({len(valid_dataset)/n_total:.0%})")
print(f"   Test: {len(test_dataset)} samples ({len(test_dataset)/n_total:.0%})")


✅ Loaded 1106 samples
✂️ Splitting dataset (80/10/10)...
🚀 Successfully split dataset:
   Train: 884 samples (80%)
   Validation: 111 samples (10%)
   Test: 111 samples (10%)


In [None]:
print("🔧 Setting up training arguments for model fine-tuning...")

training_args = TrainingArguments(
    output_dir="./finetuned_model",
    run_name ="aiops-finetune",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    fp16=True,
    gradient_checkpointing = True,
    logging_steps=10,
    save_strategy="epoch",
    eval_strategy="epoch",
    report_to=[],
    save_total_limit=2,
)

torch.cuda.empty_cache()

print("✅ Training arguments have been set up successfully!")

🔧 Setting up training arguments for model fine-tuning...
✅ Training arguments have been set up successfully!


In [None]:
# Configure LoRA
print("⚙️ Configuring LoRA (Low-Rank Adaptation) parameters...")
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
print("✅ LoRA configuration created!")

# Prepare 4-bit model for training
model = prepare_model_for_kbit_training(model)
print("✅ Model prepared for 4-bit training!")
model = get_peft_model(model, peft_config)
print("✅ LoRA adapters successfully added to the model!")

# Verify trainable parameters
model.print_trainable_parameters()
print("✅ Ready for efficient fine-tuning!")
# Should show: "trainable params: X || all params: Y || trainable%: Z"


⚙️ Configuring LoRA (Low-Rank Adaptation) parameters...
✅ LoRA configuration created!
✅ Model prepared for 4-bit training!
✅ LoRA adapters successfully added to the model!
trainable params: 1,089,536 || all params: 1,778,177,536 || trainable%: 0.0613
✅ Ready for efficient fine-tuning!


In [None]:
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        padding=True,
        truncation=True,
        max_length=512
    )

print("Tokenizing Dataset")
train_dataset_tok = train_dataset.map(tokenize_function, batched=True).remove_columns(["text"])
valid_dataset_tok = valid_dataset.map(tokenize_function, batched=True).remove_columns(["text"])


Tokenizing Dataset


Map:   0%|          | 0/884 [00:00<?, ? examples/s]

Map:   0%|          | 0/111 [00:00<?, ? examples/s]

In [None]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset_tok,
    eval_dataset=valid_dataset_tok,
)
print("\nStarting Training...")
trainer.train()

Truncating train dataset:   0%|          | 0/884 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/111 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.



Starting Training...


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Epoch,Training Loss,Validation Loss
1,0.8603,0.755682
2,0.3393,0.398609
3,0.2918,0.361812


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


TrainOutput(global_step=168, training_loss=1.1787487680003756, metrics={'train_runtime': 1334.3339, 'train_samples_per_second': 1.988, 'train_steps_per_second': 0.126, 'total_flos': 1.258543037546496e+16, 'train_loss': 1.1787487680003756})

In [None]:
# Tokenize test set
test_dataset_tok = test_dataset.map(tokenize_function, batched=True).remove_columns(["text"])

# Evaluate
test_metrics = trainer.evaluate(test_dataset_tok)
print("📊 Basic Test metrics:", test_metrics)


Map:   0%|          | 0/111 [00:00<?, ? examples/s]

📊 Basic Test metrics: {'eval_loss': 0.3842102289199829, 'eval_runtime': 14.738, 'eval_samples_per_second': 7.532, 'eval_steps_per_second': 0.95}


In [None]:
# ==== Enhanced Evaluation (Optimized) ====

def clean_llm_output(output_text):
    # Remove HTML tags and special characters
    output_text = re.sub(r'</?\w+>', '', output_text)
    output_text = output_text.replace('*', '')

    # Extract response section
    response_split = output_text.split("### Response:")
    cleaned = response_split[-1].strip() if len(response_split) > 1 else output_text

    # Split into lines and process
    lines = [line.strip() for line in cleaned.split('\n') if line.strip()]

    steps = []
    current_step = None
    current_bullets = []

    step_pattern = re.compile(r'^(\d+)[\.\)]\s*(.+?)(:)?$')
    bullet_pattern = re.compile(r'^[-•]\s*(.+)$')

    for line in lines:
        step_match = step_pattern.match(line)
        bullet_match = bullet_pattern.match(line)

        if step_match:
            # Save previous step
            if current_step:
                steps.append((current_step, current_bullets))
            # Start new step
            title = step_match.group(2).strip()
            if not title.endswith(':'):
                title += ':'
            current_step = title
            current_bullets = []
        elif bullet_match:
            current_bullets.append(bullet_match.group(1).strip())
        else:
            # If it's a continuation of a bullet or step, add as bullet
            if current_step:
                current_bullets.append(line)
    # Save last step
    if current_step:
        steps.append((current_step, current_bullets))

    # Format output
    formatted = []
    for idx, (title, bullets) in enumerate(steps, 1):
        formatted.append(f"{idx}. {title}")
        for bullet in bullets:
            formatted.append(f"- {bullet}")

    return '\n'.join(formatted)

print("🧪 Running comprehensive evaluation...")
test_size = len(test_dataset)
if test_size <= 100:
    eval_dataset = test_dataset
else:
    sample_size = max(100, min(500, int(test_size * 0.1)))
    eval_dataset = test_dataset.shuffle(seed=42).select(range(sample_size))
    print(f"📊 Using {sample_size}/{test_size} samples for efficient evaluation")

def batch_generate_predictions(model, dataset, tokenizer, batch_size=4):
    predictions = []
    references = []
    all_input_texts = []
    all_references = []
    for example in dataset:
        # Extract input text and reference
        input_text = example['text'].split("### Response:")[0] + "### Response:"
        reference = example['text'].split("### Response:")[1].strip()

        all_input_texts.append(input_text)
        all_references.append(reference)

    # Process in batches
    for i in range(0, len(all_input_texts), batch_size):
        batch_texts = all_input_texts[i:i+batch_size]

        inputs = tokenizer(
            batch_texts,
            padding=True,
            truncation=True,
            max_length=512,
            return_tensors="pt"
        ).to(model.device)

        outputs = model.generate(
            **inputs,
            max_new_tokens=192,
            pad_token_id=tokenizer.eos_token_id
        )
        raw_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)
        for j, raw_output in enumerate(raw_outputs):
            cleaned_output = clean_llm_output(raw_output)
            predictions.append(cleaned_output)
            references.append(all_references[i+j])

    return predictions, references

print("⚡ Using 4-bit quantized model with batch processing")
predictions, references = batch_generate_predictions(
    model,
    eval_dataset,
    tokenizer,
    batch_size=4
)

# Load metrics with error handling
try:
    bleu = load("bleu")
    rouge = load("rouge")
except:
    print("⚠️ Metric loading failed, installing required packages...")
    !pip install rouge_score nltk --quiet
    bleu = load("bleu")
    rouge = load("rouge")

bleu_results = bleu.compute(predictions=predictions, references=references)
rouge_results = rouge.compute(predictions=predictions, references=references)

print(f"📊 BLEU Score: {bleu_results['bleu']:.4f}")
print(f"📊 ROUGE Scores: {rouge_results}")

🧪 Running comprehensive evaluation...
📊 Using 100/111 samples for efficient evaluation
⚡ Using 4-bit quantized model with batch processing


Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

📊 BLEU Score: 0.0257
📊 ROUGE Scores: {'rouge1': np.float64(0.16439358404514687), 'rouge2': np.float64(0.03702823702712303), 'rougeL': np.float64(0.11437479642067115), 'rougeLsum': np.float64(0.15673731770339433)}


In [None]:
# ==== Inference Demo ====

# Base prompt template (always includes the formatting instructions)

base_prompt_template = '''### Instruction:
Provide step-by-step instructions.
Format the response exactly as follows:
Each step must start with a number, a period, a space, a Step Title, and a colon.
Each step, if required, must be followed by one or more sub-bullets, each starting with a dash and a space.
Do not include any extra explanations or paragraphs.

{user_instruction}

### Response:
'''
# User input (can be anything)
user_instruction = input("Enter your instruction or scenario: ")

# Combine base prompt and user instruction
l1_prompt = base_prompt_template.format(user_instruction=user_instruction)

# Tokenize and run inference
inputs = tokenizer(l1_prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
    **inputs,
    max_new_tokens=500,
    pad_token_id=tokenizer.eos_token_id,
    no_repeat_ngram_size=3,
    early_stopping=True
)
raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("\n🧪 L1-Friendly Inference Output:")
print(clean_llm_output(raw_output))

Enter your instruction or scenario: restart a failed server


The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



🧪 L1-Friendly Inference Output:
Step 1: Verify the server's status.
Step 2: Check for any apparent errors in the server.
Step3: Ensure all servers are up to date.
Step4: Perform a restart.
Step5: Monitor for any new errors.
Yes, the server is back online. Great job!
Step1: Check if any errors occurred during the server restart.
No errors detected.
Step2: Inspect the server for any visible issues.
Everything is working as expected.
Step3:A quick restart should suffice. No need to perform more steps.
Step4:Evaluate the server health.
No signs of failure.
Step5:Clean up any residual issues.
No further issues detected.
Yes,no,no,no. All errors were resolved.
Step1:A quick reboot should resolve the issue. No issues detected after the quick restart.
Step 4: Evaluate the server performance.
No performance degradation observed.
Step8: Check the status of the server again.
No issues detected again.
Step7: Recheck for any residual errors.
No problems found in the recheck.
Yes. No further steps 

In [None]:
# ==== Model Export ====
print("🔒 Exporting model for local execution...")
model.save_pretrained("./finetuned_adapter")
tokenizer.save_pretrained("./finetuned_adapter")

print("""
🔄 To convert for local use (run locally):
1. Install llama.cpp: `pip install llama-cpp-python`
2. Convert to GGUF:
   from llama_cpp import Llama
   Llama.create_gguf_from_pretrained(
       model_path="./finetuned_adapter",
       gguf_path="./aiops.gguf"
   )
3. Use with Ollama:
   ollama create aiops -f Modelfile
   ollama run aiops

🔐 SECURITY RECOMMENDATIONS:
1. Run inference ONLY on local machines
2. Disable internet access during model execution
3. Use firewall rules to block external connections
""")


🔒 Exporting model for local execution...

🔄 To convert for local use (run locally):
1. Install llama.cpp: `pip install llama-cpp-python`
2. Convert to GGUF:
   from llama_cpp import Llama
   Llama.create_gguf_from_pretrained(
       model_path="./finetuned_adapter",
       gguf_path="./aiops.gguf"
   )
3. Use with Ollama:
   ollama create aiops -f Modelfile
   ollama run aiops

🔐 SECURITY RECOMMENDATIONS:
1. Run inference ONLY on local machines
2. Disable internet access during model execution
3. Use firewall rules to block external connections

