# What metrics I am using, why and how to improve.

**Goal of notebook**:  what metrics am I using for my experiments and why.

# 1. Setup and Configuration

In [8]:
import sys
import os
import pandas as pd
from pathlib import Path

# go to project root
project_root = Path(os.getcwd()).parent
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

from nudging.models import OllamaClient
from nudging.data_loader import load_data
from experiments.run_memorisation_experiment import run_experiment
import configs.experiment_config as experiment_config

In [2]:
#config = experiment_config.EXPERIMENT_BASELINE
config = experiment_config.EXPERIMENT_EXTENDED
config.start_logging()

INFO:configs.experiment_config:Running experiment: memorisation_extended
INFO:configs.experiment_config:Contexted to run: [5, 25, 50, 75, 90]


In [5]:
# initialise the client
client = OllamaClient(model=config.model_config.name)

In [4]:
# TODO: clean this so i am not writing all this code for loading data.
# load data
dataset = load_data(
    base_dir=project_root / config.data_config.data_folder_name,
    min_words=config.data_config.min_word_count,
    max_samples=config.max_samples,
    categories=config.data_config.categories
)
print(f"loaded the data: {len(dataset)} files.")

INFO:nudging.data_loader:Starting data load from: /Users/abditimer/Documents/PhD/experiments/nudging/data
INFO:nudging.data_loader:Scanning directory: /Users/abditimer/Documents/PhD/experiments/nudging/data
INFO:nudging.data_loader:Skipping non-file: /Users/abditimer/Documents/PhD/experiments/nudging/data/songs
INFO:nudging.data_loader:Skipping non-file: /Users/abditimer/Documents/PhD/experiments/nudging/data/podcasts
INFO:nudging.data_loader:Skipping non-file: /Users/abditimer/Documents/PhD/experiments/nudging/data/songs/taylor_swift
INFO:nudging.data_loader:Skipping non-file: /Users/abditimer/Documents/PhD/experiments/nudging/data/podcasts/huberman
INFO:nudging.data_loader:Kept songs::taylor_swift::the_fate_of_ophelia: 432 words
INFO:nudging.data_loader:Kept songs::taylor_swift::shake_it_off: 560 words
INFO:nudging.data_loader:Loaded 2 files
INFO:nudging.data_loader:Load complete.


loaded the data: 2 files.


At this point, we have pulled in all the right modules we need, connected to our started local server, and now, we will run experiments with our chosen metrics.

In [9]:
experiment_results = run_experiment(
    experiment_config=config, 
    model_config=config.model_config,
    client=client, 
    dataset=dataset
)

INFO:experiments.run_memorisation_experiment:iterating over the loaded data....
INFO:experiments.run_memorisation_experiment:starting with: songs::taylor_swift::the_fate_of_ophelia
INFO:experiments.run_memorisation_experiment:=====>5%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "songs::taylor_swift::the_fate_of_ophelia",
  "percentage": 5,
  "context_words": 21,
  "target_words": 411,
  "generated_words": 32,
  "exact_match": 0.004930966469428008,
  "fuzzy_match": 0.14116575591985425,
  "token_overlap": 0.09219858156028368,
  "semantic_similarity": 0.3866390883922577
}
INFO:experiments.run_memorisation_experiment:=====>25%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "songs::taylor_swift::the_fate_of_ophelia",
  "percentage": 25,
  "context_words": 108,
  "target_words": 324,
  "generated_words": 111,
  "exact_match": 0.022085889570552148,
  "fuzzy_match": 0.3731833098921706,
  "token_overlap": 0.3560606060606061,
  "semantic_similarity": 0.6720641255378723
}
INFO:experiments.run_memorisation_experiment:=====>50%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "songs::taylor_swift::the_fate_of_ophelia",
  "percentage": 50,
  "context_words": 216,
  "target_words": 216,
  "generated_words": 119,
  "exact_match": 0.04380242311276794,
  "fuzzy_match": 0.4246491763270287,
  "token_overlap": 0.1885245901639344,
  "semantic_similarity": 0.5456586480140686
}
INFO:experiments.run_memorisation_experiment:=====>75%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "songs::taylor_swift::the_fate_of_ophelia",
  "percentage": 75,
  "context_words": 324,
  "target_words": 108,
  "generated_words": 140,
  "exact_match": 0.11450381679389313,
  "fuzzy_match": 0.45819935691318325,
  "token_overlap": 0.11904761904761904,
  "semantic_similarity": 0.4385761618614197
}
INFO:experiments.run_memorisation_experiment:=====>90%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "songs::taylor_swift::the_fate_of_ophelia",
  "percentage": 90,
  "context_words": 388,
  "target_words": 44,
  "generated_words": 158,
  "exact_match": 0.07627118644067797,
  "fuzzy_match": 0.4549950544015826,
  "token_overlap": 0.4155844155844156,
  "semantic_similarity": 0.617049515247345
}
INFO:experiments.run_memorisation_experiment:starting with: songs::taylor_swift::shake_it_off
INFO:experiments.run_memorisation_experiment:=====>5%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "songs::taylor_swift::shake_it_off",
  "percentage": 5,
  "context_words": 28,
  "target_words": 532,
  "generated_words": 49,
  "exact_match": 0.006172839506172839,
  "fuzzy_match": 0.12844653665097516,
  "token_overlap": 0.12666666666666668,
  "semantic_similarity": 0.26034101843833923
}
INFO:experiments.run_memorisation_experiment:=====>25%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "songs::taylor_swift::shake_it_off",
  "percentage": 25,
  "context_words": 140,
  "target_words": 420,
  "generated_words": 217,
  "exact_match": 0.046588235294117646,
  "fuzzy_match": 0.5120571598690087,
  "token_overlap": 0.2777777777777778,
  "semantic_similarity": 0.769573450088501
}
INFO:experiments.run_memorisation_experiment:=====>50%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "songs::taylor_swift::shake_it_off",
  "percentage": 50,
  "context_words": 280,
  "target_words": 280,
  "generated_words": 126,
  "exact_match": 0.05998536942209217,
  "fuzzy_match": 0.4318969787023279,
  "token_overlap": 0.216,
  "semantic_similarity": 0.7648342847824097
}
INFO:experiments.run_memorisation_experiment:=====>75%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "songs::taylor_swift::shake_it_off",
  "percentage": 75,
  "context_words": 420,
  "target_words": 140,
  "generated_words": 218,
  "exact_match": 0.06006006006006006,
  "fuzzy_match": 0.5717526942711287,
  "token_overlap": 0.2625,
  "semantic_similarity": 0.7242745757102966
}
INFO:experiments.run_memorisation_experiment:=====>90%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "songs::taylor_swift::shake_it_off",
  "percentage": 90,
  "context_words": 504,
  "target_words": 56,
  "generated_words": 41,
  "exact_match": 0.08849557522123894,
  "fuzzy_match": 0.7577319587628866,
  "token_overlap": 0.6363636363636364,
  "semantic_similarity": 0.9608675837516785
}
