In [1]:
model_name = "davnas/Italian_Cousine_1.3"
dataset_name = "/content/test_set_new.jsonl"


In [2]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
#!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

!pip install -q bitsandbytes
!pip install -q evaluate jsonlines rouge_score bert-score
!pip install transformers peft accelerate bitsandbytes jsonlines
!pip install evaluate tqdm
import evaluate
import evaluate
import jsonlines
from unsloth import FastLanguageModel
import torch
from tqdm import tqdm  # Import tqdm for progress tracking


In [3]:
# prompt: please download the test set from "https://github.com/davidenascivera/LAB2_Scalable/blob/main/Test_set/test_set.jsonl"

import requests
from io import StringIO
import json

url = "https://raw.githubusercontent.com/davidenascivera/LAB2_Scalable/main/Test_set/test_set.jsonl"

try:
    response = requests.get(url, stream=True)
    response.raise_for_status()  # Raise an exception for bad status codes

    with open("test_set.jsonl", "wb") as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)

    print("Download successful. File saved as test_set.jsonl")

except requests.exceptions.RequestException as e:
    print(f"Error downloading the file: {e}")

Download successful. File saved as test_set.jsonl


In [4]:
# prompt: please download the test set from "https://github.com/davidenascivera/LAB2_Scalable/blob/main/Test_set/test_set.jsonl"

import requests
from io import StringIO
import json

url = "https://raw.githubusercontent.com/davidenascivera/LAB2_Scalable/main/Test_set/test_set_new.jsonl"

try:
    response = requests.get(url, stream=True)
    response.raise_for_status()  # Raise an exception for bad status codes

    with open("test_set_new.jsonl", "wb") as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)

    print("Download successful. File saved as test_set.jsonl")

except requests.exceptions.RequestException as e:
    print(f"Error downloading the file: {e}")

Download successful. File saved as test_set.jsonl


## 1.SimpleInference

In [5]:
"""
from transformers import AutoModel, AutoTokenizer

max_seq_length = 2048  # Choose any! We auto support ROPE scaling internally!
dtype = None  # None for auto detection. Float16 for Tesla T4, V100, bFloat16 for Ampere+

model_name_or_path = "davnas/Italian_Cousine"

from unsloth import FastLanguageModel
import torch

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name_or_path,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=True,
    # token = "hf_...", #se il nostro modello non è public
    # Use one if using gated models like meta-llama/Llama-2-7b-hf
)
"""

'\nfrom transformers import AutoModel, AutoTokenizer\n\nmax_seq_length = 2048  # Choose any! We auto support ROPE scaling internally!\ndtype = None  # None for auto detection. Float16 for Tesla T4, V100, bFloat16 for Ampere+\n\nmodel_name_or_path = "davnas/Italian_Cousine"\n\nfrom unsloth import FastLanguageModel\nimport torch\n\nmodel, tokenizer = FastLanguageModel.from_pretrained(\n    model_name=model_name_or_path,\n    max_seq_length=max_seq_length,\n    dtype=dtype,\n    load_in_4bit=True,\n    # token = "hf_...", #se il nostro modello non è public\n    # Use one if using gated models like meta-llama/Llama-2-7b-hf\n)\n'

In [6]:
"""
from unsloth import FastLanguageModel

FastLanguageModel.for_inference(model)  # Enable native 2x faster inference

messages = [
    {"role": "user", "content": "How can I cook an smoothie?"},
]

inputs = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,  # Must add for generation
    return_tensors="pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

model.generate(input_ids=inputs, streamer=text_streamer, max_new_tokens=128,
               use_cache=True, temperature=1.5, min_p=0.1)
"""

'\nfrom unsloth import FastLanguageModel\n\nFastLanguageModel.for_inference(model)  # Enable native 2x faster inference\n\nmessages = [\n    {"role": "user", "content": "How can I cook an smoothie?"},\n]\n\ninputs = tokenizer.apply_chat_template(\n    messages,\n    tokenize=True,\n    add_generation_prompt=True,  # Must add for generation\n    return_tensors="pt",\n).to("cuda")\n\nfrom transformers import TextStreamer\ntext_streamer = TextStreamer(tokenizer, skip_prompt=True)\n\nmodel.generate(input_ids=inputs, streamer=text_streamer, max_new_tokens=128,\n               use_cache=True, temperature=1.5, min_p=0.1)\n'

## 2.Evaluation

In [7]:
def load_test_data(file_path):
    """Load test data from JSONL file."""
    prompts = []
    completions = []
    with jsonlines.open(file_path) as reader:
        for item in reader:
            prompts.append(item['prompt'])
            completions.append(item['completion'])
    return prompts, completions



In [8]:
# prompt: clean the memory vram

import gc
import torch

# Clear GPU memory
torch.cuda.empty_cache()

# Run garbage collection
gc.collect()


60

In [9]:

#------------------------------------------

def generate_predictions(model, tokenizer, prompts):
    """Generate model predictions for given prompts with progress tracking."""
    predictions = []

    # Use tqdm to wrap the prompts list for progress tracking
    for i, prompt in enumerate(tqdm(prompts, desc="Generating Predictions")):
        print(f"Processing prompt {i + 1}/{len(prompts)}: {prompt}")

        messages = [{"role": "user", "content": prompt}]
        inputs = tokenizer.apply_chat_template(
            messages,
            tokenize=True,
            add_generation_prompt=True,
            return_tensors="pt"
        )
        # Ensure input_ids are Long type and on cuda
        inputs = inputs.to("cuda").to(torch.long)  # Changed this line

        outputs = model.generate(
            input_ids=inputs,
            max_new_tokens=512,
            use_cache=True,
            temperature=1.5,
            min_p=0.1
        )

        pred = tokenizer.decode(outputs[0], skip_special_tokens=True)
        predictions.append(pred)

    return predictions

# Initialize model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,  # Use the actual model name
    max_seq_length=2048,
    load_in_4bit=True
)
FastLanguageModel.for_inference(model)

# Load test data
prompts, references = load_test_data(dataset_name)

# Generate predictions with progress tracking
predictions = generate_predictions(model, tokenizer, prompts)


==((====))==  Unsloth 2024.12.3: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


adapter_model.safetensors:   0%|          | 0.00/45.1M [00:00<?, ?B/s]

Generating Predictions:   0%|          | 0/40 [00:00<?, ?it/s]

Processing prompt 1/40: How can I make a hearty vegetable stir-fry?


Generating Predictions:   2%|▎         | 1/40 [00:13<08:42, 13.40s/it]

Processing prompt 2/40: What’s the recipe for creamy mushroom soup?


Generating Predictions:   5%|▌         | 2/40 [00:19<05:52,  9.27s/it]

Processing prompt 3/40: How do I prepare a classic Caesar salad?


Generating Predictions:   8%|▊         | 3/40 [00:24<04:33,  7.40s/it]

Processing prompt 4/40: What’s the method to cook lemon garlic salmon?


Generating Predictions:  10%|█         | 4/40 [00:32<04:24,  7.35s/it]

Processing prompt 5/40: How can I make a delicious chicken Alfredo pasta?


Generating Predictions:  12%|█▎        | 5/40 [00:39<04:14,  7.28s/it]

Processing prompt 6/40: What’s the best way to prepare beef tacos?


Generating Predictions:  15%|█▌        | 6/40 [00:44<03:46,  6.65s/it]

Processing prompt 7/40: How do I make a fluffy omelette?


Generating Predictions:  18%|█▊        | 7/40 [00:50<03:33,  6.46s/it]

Processing prompt 8/40: What’s the recipe for homemade guacamole?


Generating Predictions:  20%|██        | 8/40 [01:02<04:16,  8.02s/it]

Processing prompt 9/40: How can I bake a chocolate chip cookie?


Generating Predictions:  22%|██▎       | 9/40 [01:09<04:00,  7.77s/it]

Processing prompt 10/40: What’s the best way to make beef stew?


Generating Predictions:  25%|██▌       | 10/40 [01:15<03:35,  7.18s/it]

Processing prompt 11/40: How do I prepare a spinach and feta quiche?


Generating Predictions:  28%|██▊       | 11/40 [01:24<03:44,  7.76s/it]

Processing prompt 12/40: What’s the method to cook shrimp scampi?


Generating Predictions:  30%|███       | 12/40 [01:31<03:29,  7.46s/it]

Processing prompt 13/40: How can I bake banana bread?


Generating Predictions:  32%|███▎      | 13/40 [01:36<03:04,  6.82s/it]

Processing prompt 14/40: What’s the recipe for homemade pizza Margherita?


Generating Predictions:  35%|███▌      | 14/40 [01:39<02:28,  5.71s/it]

Processing prompt 15/40: How do I make a refreshing fruit smoothie?


Generating Predictions:  38%|███▊      | 15/40 [01:45<02:27,  5.88s/it]

Processing prompt 16/40: What’s the best way to cook chicken tikka masala?


Generating Predictions:  40%|████      | 16/40 [02:00<03:21,  8.39s/it]

Processing prompt 17/40: How can I make a creamy pumpkin soup?


Generating Predictions:  42%|████▎     | 17/40 [02:05<02:51,  7.45s/it]

Processing prompt 18/40: What’s the recipe for garlic butter shrimp?


Generating Predictions:  45%|████▌     | 18/40 [02:12<02:41,  7.34s/it]

Processing prompt 19/40: How can I prepare a vegetarian lasagna?


Generating Predictions:  48%|████▊     | 19/40 [02:20<02:35,  7.42s/it]

Processing prompt 20/40: What’s the best way to make blueberry muffins?


Generating Predictions:  50%|█████     | 20/40 [02:28<02:33,  7.67s/it]

Processing prompt 21/40: How do I make a creamy chicken Alfredo sauce?


Generating Predictions:  52%|█████▎    | 21/40 [02:34<02:15,  7.15s/it]

Processing prompt 22/40: What’s the recipe for baked salmon with herbs?


Generating Predictions:  55%|█████▌    | 22/40 [02:41<02:06,  7.02s/it]

Processing prompt 23/40: How can I make a vegetarian chili?


Generating Predictions:  57%|█████▊    | 23/40 [02:46<01:50,  6.49s/it]

Processing prompt 24/40: What’s the method to make a vanilla panna cotta?


Generating Predictions:  60%|██████    | 24/40 [02:53<01:48,  6.81s/it]

Processing prompt 25/40: How do I make a Greek salad?


Generating Predictions:  62%|██████▎   | 25/40 [02:59<01:38,  6.56s/it]

Processing prompt 26/40: What’s the best way to prepare a tofu stir-fry?


Generating Predictions:  65%|██████▌   | 26/40 [03:07<01:35,  6.84s/it]

Processing prompt 27/40: How can I make a blueberry lemon tart?


Generating Predictions:  68%|██████▊   | 27/40 [03:13<01:24,  6.53s/it]

Processing prompt 28/40: What’s the best way to make ratatouille?


Generating Predictions:  70%|███████   | 28/40 [03:19<01:18,  6.50s/it]

Processing prompt 29/40: How do I make a chocolate mousse?


Generating Predictions:  72%|███████▎  | 29/40 [03:27<01:17,  7.01s/it]

Processing prompt 30/40: What’s the recipe for beef and broccoli stir-fry?


Generating Predictions:  75%|███████▌  | 30/40 [03:34<01:09,  6.99s/it]

Processing prompt 31/40: How can I make a creamy potato gratin?


Generating Predictions:  78%|███████▊  | 31/40 [03:39<00:57,  6.44s/it]

Processing prompt 32/40: What’s the best way to make a caprese salad?


Generating Predictions:  80%|████████  | 32/40 [03:46<00:51,  6.46s/it]

Processing prompt 33/40: How do I make a vegan lentil curry?


Generating Predictions:  82%|████████▎ | 33/40 [03:51<00:43,  6.19s/it]

Processing prompt 34/40: What’s the recipe for banana pancakes?


Generating Predictions:  85%|████████▌ | 34/40 [03:54<00:30,  5.01s/it]

Processing prompt 35/40: How do I make a refreshing cucumber salad?


Generating Predictions:  88%|████████▊ | 35/40 [04:01<00:27,  5.58s/it]

Processing prompt 36/40: What’s the best way to cook stuffed bell peppers?


Generating Predictions:  90%|█████████ | 36/40 [04:07<00:22,  5.74s/it]

Processing prompt 37/40: How can I make a classic French omelette?


Generating Predictions:  92%|█████████▎| 37/40 [04:14<00:18,  6.28s/it]

Processing prompt 38/40: What’s the recipe for creamy garlic mashed potatoes?


Generating Predictions:  95%|█████████▌| 38/40 [04:20<00:12,  6.07s/it]

Processing prompt 39/40: How do I make a simple bruschetta appetizer?


Generating Predictions:  98%|█████████▊| 39/40 [04:26<00:06,  6.00s/it]

Processing prompt 40/40: What’s the best way to make a shrimp and avocado salad?


Generating Predictions: 100%|██████████| 40/40 [04:31<00:00,  6.79s/it]


In [10]:
# Evaluate using ROUGE
rouge = evaluate.load('rouge')
rouge_results = rouge.compute(
    predictions=predictions,
    references=references,
    use_stemmer=True
)

# Evaluate using BERTScore
bertscore = evaluate.load('bertscore')
bert_results = bertscore.compute(
    predictions=predictions,
    references=references,
    lang="en",
    model_type="microsoft/deberta-xlarge-mnli"
)

print(f"Dataset: {dataset_name}")
print(f"Model: {model_name}")

# Print results
print("\nROUGE Scores:")
for metric, score in rouge_results.items():
    print(f"{metric}: {score:.4f}")

print("\nBERTScore:")
print(f"Precision: {sum(bert_results['precision']) / len(bert_results['precision']):.4f}")
print(f"Recall: {sum(bert_results['recall']) / len(bert_results['recall']):.4f}")
print(f"F1: {sum(bert_results['f1']) / len(bert_results['f1']):.4f}")

Dataset: /content/test_set_new.jsonl
Model: davnas/Italian_Cousine_1.3

ROUGE Scores:
rouge1: 0.4347
rouge2: 0.1312
rougeL: 0.2832
rougeLsum: 0.4289

BERTScore:
Precision: 0.6936
Recall: 0.6919
F1: 0.6920
