In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

In [None]:
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
import re
import nltk
from collections import Counter

# Download NLTK resources for tokenization
nltk.download('punkt')

# Helper functions
def nltk_tokenize(text):
    """Tokenizes text using NLTK's word_tokenize."""
    return nltk.word_tokenize(text.lower())

def calculate_completeness(ground_truth_tokens, model_tokens):
    """Calculates the Completeness metric."""
    ground_truth_count = Counter(ground_truth_tokens)
    model_count = Counter(model_tokens)
    common_count = sum(min(ground_truth_count[token], model_count[token]) for token in ground_truth_count)
    return common_count / len(ground_truth_tokens)

def calculate_hallucination(ground_truth_tokens, model_tokens):
    """Calculates the Hallucination metric."""
    ground_truth_set = set(ground_truth_tokens)
    hallucinated_tokens = [token for token in model_tokens if token not in ground_truth_set]
    return len(hallucinated_tokens) / len(model_tokens)

def calculate_irrelevance(ground_truth_tokens, model_tokens):
    """Calculates the Irrelevance metric."""
    return 1 - calculate_completeness(ground_truth_tokens, model_tokens) - calculate_hallucination(ground_truth_tokens, model_tokens)

# Load the file content
file_path = "Generated.txt"  # Update with the correct path to your file
with open(file_path, 'r') as file:
    text_content = file.read()

# Split the content into sections
lines = text_content.split("\n")
model_outputs = {}
ground_truth = ""

current_section = None
for line in lines:
    line = line.strip()
    if line.startswith("LLama 3.2 3B"):
        current_section = "3B"
        model_outputs[current_section] = ""
    elif line.startswith("Model: llama3.2:1b"):
        current_section = "1B"
        model_outputs[current_section] = ""
    elif line.startswith("Ground Truth"):
        current_section = "ground_truth"
    elif current_section == "ground_truth":
        ground_truth += line + "\n"
    elif current_section in model_outputs:
        model_outputs[current_section] += line + "\n"

# Tokenize the texts using NLTK tokenizer
ground_truth_tokens = nltk_tokenize(ground_truth)
model_tokens_3B = nltk_tokenize(model_outputs["3B"])
model_tokens_1B = nltk_tokenize(model_outputs["1B"])

# Calculate metrics for both models
metrics_3B = {
    "Completeness": calculate_completeness(ground_truth_tokens, model_tokens_3B),
    "Hallucination": calculate_hallucination(ground_truth_tokens, model_tokens_3B),
    "Irrelevance": calculate_irrelevance(ground_truth_tokens, model_tokens_3B)
}

metrics_1B = {
    "Completeness": calculate_completeness(ground_truth_tokens, model_tokens_1B),
    "Hallucination": calculate_hallucination(ground_truth_tokens, model_tokens_1B),
    "Irrelevance": calculate_irrelevance(ground_truth_tokens, model_tokens_1B)
}

# Display results
print("3B Model Metrics:", metrics_3B)
print("1B Model Metrics:", metrics_1B)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


3B Model Metrics: {'Completeness': 0.6275303643724697, 'Hallucination': 0.27848101265822783, 'Irrelevance': 0.0939886229693025}
1B Model Metrics: {'Completeness': 0.659919028340081, 'Hallucination': 0.3262411347517731, 'Irrelevance': 0.013839836908145942}


In [None]:
from sentence_transformers import SentenceTransformer, util
import numpy as np

# Load the pre-trained Sentence Transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')  # You can choose a different model if needed

# Helper function to calculate cosine similarity
def calculate_semantic_similarity(text1, text2, model):
    """Calculates semantic similarity between two texts using sentence transformer embeddings."""
    embeddings1 = model.encode(text1, convert_to_tensor=True)
    embeddings2 = model.encode(text2, convert_to_tensor=True)
    similarity = util.cos_sim(embeddings1, embeddings2)
    return similarity.item()

# Prepare the inputs
ground_truth_text = ground_truth
model_3B_text = model_outputs["3B"]
model_1B_text = model_outputs["1B"]

# Compute semantic similarity
similarity_3B = calculate_semantic_similarity(ground_truth_text, model_3B_text, model)
similarity_1B = calculate_semantic_similarity(ground_truth_text, model_1B_text, model)

# Display results
print(f"Semantic Similarity (3B Model vs Ground Truth): {similarity_3B:.4f}")
print(f"Semantic Similarity (1B Model vs Ground Truth): {similarity_1B:.4f}")

  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Semantic Similarity (3B Model vs Ground Truth): 0.8551
Semantic Similarity (1B Model vs Ground Truth): 0.8449


In [None]:
import re
import nltk
from collections import Counter

# Download NLTK resources for tokenization
nltk.download('punkt')

# Helper functions
def nltk_tokenize(text):
    """Tokenizes text using NLTK's word_tokenize."""
    return nltk.word_tokenize(text.lower())

def calculate_completeness(ground_truth_tokens, model_tokens):
    """Calculates the Completeness metric."""
    ground_truth_count = Counter(ground_truth_tokens)
    model_count = Counter(model_tokens)
    common_count = sum(min(ground_truth_count[token], model_count[token]) for token in ground_truth_count)
    return common_count / len(ground_truth_tokens)

def calculate_hallucination(ground_truth_tokens, model_tokens):
    """Calculates the Hallucination metric."""
    ground_truth_set = set(ground_truth_tokens)
    hallucinated_tokens = [token for token in model_tokens if token not in ground_truth_set]
    return len(hallucinated_tokens) / len(model_tokens)

def calculate_irrelevance(ground_truth_tokens, model_tokens):
    """Calculates the Irrelevance metric."""
    return 1 - calculate_completeness(ground_truth_tokens, model_tokens) - calculate_hallucination(ground_truth_tokens, model_tokens)

# Load the content from the uploaded file
file_path = "after_fine_tuning_measurments.txt"  # Ensure this is the correct file path
with open(file_path, 'r') as file:
    text_content = file.read()

# Split the content into sections
lines = text_content.split("\n")
model_outputs = {}
ground_truth = ""

current_section = None
for line in lines:
    line = line.strip()
    if line.startswith("LLama 3.2 3BF"):
        current_section = "3BF"
        model_outputs[current_section] = ""
    elif line.startswith("LLama 3.2 3BP"):
        current_section = "3BP"
        model_outputs[current_section] = ""
    elif line.startswith("Ground Truth"):
        current_section = "ground_truth"
    elif current_section == "ground_truth":
        ground_truth += line + "\n"
    elif current_section in model_outputs:
        model_outputs[current_section] += line + "\n"

# Tokenize the texts using NLTK tokenizer
ground_truth_tokens = nltk_tokenize(ground_truth)
model_tokens_3BF = nltk_tokenize(model_outputs["3BF"])
model_tokens_3BP = nltk_tokenize(model_outputs["3BP"])

# Calculate metrics for both models
metrics_3BF = {
    "Completeness": calculate_completeness(ground_truth_tokens, model_tokens_3BF),
    "Hallucination": calculate_hallucination(ground_truth_tokens, model_tokens_3BF),
    "Irrelevance": calculate_irrelevance(ground_truth_tokens, model_tokens_3BF)
}

metrics_3BP = {
    "Completeness": calculate_completeness(ground_truth_tokens, model_tokens_3BP),
    "Hallucination": calculate_hallucination(ground_truth_tokens, model_tokens_3BP),
    "Irrelevance": calculate_irrelevance(ground_truth_tokens, model_tokens_3BP)
}

# Display results
print("3BF Model Metrics:", metrics_3BF)
print("3BP Model Metrics:", metrics_3BP)

3BF Model Metrics: {'Completeness': 0.2145748987854251, 'Hallucination': 0.35714285714285715, 'Irrelevance': 0.4282822440717177}
3BP Model Metrics: {'Completeness': 0.4048582995951417, 'Hallucination': 0.390625, 'Irrelevance': 0.20451670040485825}


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
from sentence_transformers import SentenceTransformer, util
import numpy as np

# Load the pre-trained Sentence Transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')  # You can choose a different model if needed

# Helper function to calculate cosine similarity
def calculate_semantic_similarity(text1, text2, model):
    """Calculates semantic similarity between two texts using sentence transformer embeddings."""
    embeddings1 = model.encode(text1, convert_to_tensor=True)
    embeddings2 = model.encode(text2, convert_to_tensor=True)
    similarity = util.cos_sim(embeddings1, embeddings2)
    return similarity.item()

# Load the content from the uploaded file
file_path = "after_fine_tuning_measurments.txt"  # Ensure this is the correct file path
with open(file_path, 'r') as file:
    text_content = file.read()

# Split the content into sections
lines = text_content.split("\n")
model_outputs = {}
ground_truth_text = ""

current_section = None
for line in lines:
    line = line.strip()
    if line.startswith("LLama 3.2 3BF"):
        current_section = "3BF"
        model_outputs[current_section] = ""
    elif line.startswith("LLama 3.2 3BP"):
        current_section = "3BP"
        model_outputs[current_section] = ""
    elif line.startswith("Ground Truth"):
        current_section = "ground_truth"
    elif current_section == "ground_truth":
        ground_truth_text += line + "\n"
    elif current_section in model_outputs:
        model_outputs[current_section] += line + "\n"

# Prepare the inputs
model_3BF_text = model_outputs["3BF"]
model_3BP_text = model_outputs["3BP"]

# Compute semantic similarity
similarity_3BF = calculate_semantic_similarity(ground_truth_text, model_3BF_text, model)
similarity_3BP = calculate_semantic_similarity(ground_truth_text, model_3BP_text, model)

# Display results
print(f"Semantic Similarity (3BF Model vs Ground Truth): {similarity_3BF:.4f}")
print(f"Semantic Similarity (3BP Model vs Ground Truth): {similarity_3BP:.4f}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Semantic Similarity (3BF Model vs Ground Truth): 0.7075
Semantic Similarity (3BP Model vs Ground Truth): 0.6821
