## Llama 3 8b unsloth finetuning on 4G anomalie solutions 

You will find the data set here!  [Data link](https://www.kaggle.com/datasets/2a1502add11898403c87b6137d2f0b2720c8802d93509c43f62f3f6e79da4081)

In [1]:
%%capture
!pip install pip3-autoremove
!pip-autoremove torch torchvision torchaudio -y
!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu121
!pip install unsloth

* We support Llama, Mistral, CodeLlama, TinyLlama, Vicuna, Open Hermes etc
* And Yi, Qwen ([llamafied](https://huggingface.co/models?sort=trending&search=qwen+llama)), Deepseek, all Llama, Mistral derived archs.
* We support 16bit LoRA or 4bit QLoRA. Both 2x faster.
* `max_seq_length` can be set to anything, since we do automatic RoPE Scaling via [kaiokendev's](https://kaiokendev.github.io/til) method.
* [**NEW**] With [PR 26037](https://github.com/huggingface/transformers/pull/26037), we support downloading 4bit models **4x faster**! [Our repo](https://huggingface.co/unsloth) has Llama, Mistral 4bit models.

In [3]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3


In [4]:
from unsloth import FastLanguageModel
import torch
from datasets import load_dataset
import pandas as pd
import string
from trl import SFTTrainer
from transformers import TrainingArguments
from evaluate import load
import numpy as np

# Configuration
max_seq_length = 2048
model_name = "unsloth/llama-3-8b-bnb-4bit"

In [37]:
# Load and preprocess data
def clean_text(text):
    return text.replace('\n', ' ').translate(str.maketrans('', '', string.punctuation))

def preprocess_function(examples):
    instructions = []
    inputs = []
    outputs = []
    
    for i in range(len(examples["ID"])):
        # Create input from all features except ID/Date/output
        input_parts = []
        for key in examples:
            if key not in ["ID", "Date", "network_labels", "improvement_solutions"]:
                value = examples[key][i]
                if isinstance(value, float) and np.isnan(value):
                    value = "N/A"
                input_parts.append(f"{key}: {value}")
        
        instructions.append("Analyze network metrics and provide optimization solutions")
        inputs.append(clean_text(", ".join(input_parts)))
        
        # Handle missing improvement_solutions in test set
        if "improvement_solutions" in examples:
            outputs.append(clean_text(str(examples["improvement_solutions"][i])))
        else:
            outputs.append("")  # Empty placeholder for test data
    
    return {"instruction": instructions, "input": inputs, "output": outputs}

# Load datasets
train_dataset = load_dataset("csv", data_files="/kaggle/input/indabax-tunisia-2025-anomaly-solver-challenge-2/Train.csv", split="train")
test_dataset = load_dataset("csv", data_files="/kaggle/input/indabax-tunisia-2025-anomaly-solver-challenge-2/Test.csv", split="train")

In [38]:
# Preprocess datasets
train_dataset = train_dataset.map(preprocess_function, batched=True)
test_dataset = test_dataset.map(preprocess_function, batched=True)


In [39]:
# Formatting function
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = "</s>"

In [40]:
def formatting_prompts_func(examples):
    texts = []
    for instr, inp, outp in zip(examples["instruction"], examples["input"], examples["output"]):
        text = alpaca_prompt.format(instr, inp, outp) + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

train_dataset = train_dataset.map(formatting_prompts_func, batched=True)
test_dataset = test_dataset.map(formatting_prompts_func, batched=True)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [41]:
# Load model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    load_in_4bit=True,
)

==((====))==  Unsloth 2025.4.7: Fast Llama patching. Transformers: 4.51.1.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [42]:
# Prepare model for training
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
)

In [44]:
# Training setup
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=60,
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

# Train
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,000 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 41,943,040/8,000,000,000 (0.52% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,3.0539
2,3.0283
3,2.9924
4,2.9166
5,2.7506
6,2.5058
7,2.2513
8,1.978
9,1.7803
10,1.6086


TrainOutput(global_step=60, training_loss=1.1461307148138682, metrics={'train_runtime': 3141.8312, 'train_samples_per_second': 0.306, 'train_steps_per_second': 0.019, 'total_flos': 3.884493561249792e+16, 'train_loss': 1.1461307148138682})

In [45]:
# Validation and CSV Generation
def generate_predictions(dataset):
    model.eval()
    predictions = []
    for example in dataset:
        prompt = alpaca_prompt.format(
            example["instruction"],
            example["input"],
            ""
        )
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to("cuda")
        outputs = model.generate(**inputs, max_new_tokens=200)
        pred_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        pred_text = pred_text.split("### Response:")[-1].strip()
        predictions.append(pred_text)
    return predictions

# Generate predictions
test_predictions = generate_predictions(test_dataset)

In [46]:
# Create submission CSV
submission_df = pd.DataFrame({
    "ID": test_dataset["ID"],
    "improvement_solutions": test_predictions,
})

# Save to CSV
submission_df.to_csv("network_optimization_solutions.csv", index=False)

print("Submission CSV created successfully!")

Submission CSV created successfully!
