# Experiment using the Nudging module

This notebook uses the nudging module, experiment config and files, to run a full end2end experiment.

In [1]:
import sys
import os
from pathlib import Path

# go to project root
project_root = Path(os.getcwd()).parent
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

In [2]:
from nudging.models import OllamaClient
from nudging.data_loader import load_data
from nudging.experiment import run_single_experiment
import configs.experiment_config as experiment_config

In [3]:
import pandas as pd

Lets start the model and get all the experiment details.

In [4]:
#config = experiment_config.EXPERIMENT_BASELINE
config = experiment_config.EXPERIMENT_EXTENDED
print(f"Running experiment: {config.name}")
print(f"Context percentages: {config.context_percentages}")

Running experiment: memorisation_extended
Context percentages: [5, 25, 50, 75, 90]


In [5]:
client = OllamaClient(model=config.model_config.name)
print(f"Model initialised: {config.model_config.name}")

Model initialised: qwen3:0.6b


Now lets load our data.

In [6]:
dataset = load_data(
    base_dir=project_root / config.data_config.data_folder_name,
    min_words=config.data_config.min_word_count,
    max_samples=config.max_samples
)
print(f"loaded the data: {len(dataset)} files.")

loaded the data: 3 files.


## Lets start experimenting!

In [7]:
from experiments.run_memorisation_experiment import run_experiment

experiment_results = run_experiment(
    experiment_config=config, 
    model_config=config.model_config,
    client=client, 
    dataset=dataset
)

INFO:experiments.run_memorisation_experiment:iterating over the loaded data....
INFO:experiments.run_memorisation_experiment:starting with: podcasts::huberman::how_to_build_immense_inner_strength_David_Goggins
INFO:experiments.run_memorisation_experiment:=====>5%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "podcasts::huberman::how_to_build_immense_inner_strength_David_Goggins",
  "percentage": 5,
  "context_words": 1372,
  "target_words": 26074,
  "generated_words": 164,
  "exact_match": 0.0004910775541449112,
  "fuzzy_match": 0.013812986789448756,
  "token_overlap": 0.018770402611534277,
  "semantic_similarity": 0.18813730776309967
}
INFO:experiments.run_memorisation_experiment:=====>25%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "podcasts::huberman::how_to_build_immense_inner_strength_David_Goggins",
  "percentage": 25,
  "context_words": 6861,
  "target_words": 20585,
  "generated_words": 321,
  "exact_match": 0.001001001001001001,
  "fuzzy_match": 0.029585003092977624,
  "token_overlap": 0.04664536741214057,
  "semantic_similarity": 0.4411312937736511
}
INFO:experiments.run_memorisation_experiment:=====>50%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "podcasts::huberman::how_to_build_immense_inner_strength_David_Goggins",
  "percentage": 50,
  "context_words": 13723,
  "target_words": 13723,
  "generated_words": 142,
  "exact_match": 0.001032089846137132,
  "fuzzy_match": 0.018699383862028185,
  "token_overlap": 0.025416666666666667,
  "semantic_similarity": 0.3717755079269409
}
INFO:experiments.run_memorisation_experiment:=====>75%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "podcasts::huberman::how_to_build_immense_inner_strength_David_Goggins",
  "percentage": 75,
  "context_words": 20584,
  "target_words": 6862,
  "generated_words": 420,
  "exact_match": 0.004271323035594359,
  "fuzzy_match": 0.11102102862933873,
  "token_overlap": 0.09157509157509157,
  "semantic_similarity": 0.3218907415866852
}
INFO:experiments.run_memorisation_experiment:=====>90%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "podcasts::huberman::how_to_build_immense_inner_strength_David_Goggins",
  "percentage": 90,
  "context_words": 24701,
  "target_words": 2745,
  "generated_words": 274,
  "exact_match": 0.007628990162617948,
  "fuzzy_match": 0.16850929413194027,
  "token_overlap": 0.06586169045005488,
  "semantic_similarity": 0.6978582739830017
}
INFO:experiments.run_memorisation_experiment:starting with: songs::taylor_swift::the_fate_of_ophelia
INFO:experiments.run_memorisation_experiment:=====>5%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "songs::taylor_swift::the_fate_of_ophelia",
  "percentage": 5,
  "context_words": 21,
  "target_words": 411,
  "generated_words": 52,
  "exact_match": 0.011341222879684419,
  "fuzzy_match": 0.20052310374891025,
  "token_overlap": 0.07894736842105263,
  "semantic_similarity": 0.41194984316825867
}
INFO:experiments.run_memorisation_experiment:=====>25%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "songs::taylor_swift::the_fate_of_ophelia",
  "percentage": 25,
  "context_words": 108,
  "target_words": 324,
  "generated_words": 116,
  "exact_match": 0.02147239263803681,
  "fuzzy_match": 0.35454545454545455,
  "token_overlap": 0.16875,
  "semantic_similarity": 0.5711839199066162
}
INFO:experiments.run_memorisation_experiment:=====>50%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "songs::taylor_swift::the_fate_of_ophelia",
  "percentage": 50,
  "context_words": 216,
  "target_words": 216,
  "generated_words": 186,
  "exact_match": 0.07921714818266543,
  "fuzzy_match": 0.5763224181360201,
  "token_overlap": 0.525,
  "semantic_similarity": 0.7457348704338074
}
INFO:experiments.run_memorisation_experiment:=====>75%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "songs::taylor_swift::the_fate_of_ophelia",
  "percentage": 75,
  "context_words": 324,
  "target_words": 108,
  "generated_words": 171,
  "exact_match": 0.08969465648854962,
  "fuzzy_match": 0.4576144036009002,
  "token_overlap": 0.1590909090909091,
  "semantic_similarity": 0.5140250325202942
}
INFO:experiments.run_memorisation_experiment:=====>90%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "songs::taylor_swift::the_fate_of_ophelia",
  "percentage": 90,
  "context_words": 388,
  "target_words": 44,
  "generated_words": 85,
  "exact_match": 0.07203389830508475,
  "fuzzy_match": 0.406832298136646,
  "token_overlap": 0.08955223880597014,
  "semantic_similarity": 0.34138622879981995
}
INFO:experiments.run_memorisation_experiment:starting with: songs::taylor_swift::shake_it_off
INFO:experiments.run_memorisation_experiment:=====>5%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.
INFO:nudging.metrics:calculating exact match
INFO:nudging.metrics:calculating fuzzy match
INFO:nudging.metrics:calculating token overlap
INFO:nudging.metrics:calculating semantic similarity


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:experiments.run_memorisation_experiment:Experiment results: {
  "content": "songs::taylor_swift::shake_it_off",
  "percentage": 5,
  "context_words": 28,
  "target_words": 532,
  "generated_words": 50,
  "exact_match": 0.0054466230936819175,
  "fuzzy_match": 0.14086378737541527,
  "token_overlap": 0.08552631578947369,
  "semantic_similarity": 0.39089882373809814
}
INFO:experiments.run_memorisation_experiment:=====>25%
INFO:nudging.experiment:running all experiments
INFO:nudging.experiment:generating a response via model client.
INFO:nudging.experiment:splitting text.


ReadTimeout: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=120)

In [None]:
experiment_results

In [None]:
# Convert to DataFrame
df_results = pd.DataFrame(experiment_results)
df_results

In [None]:
import matplotlib.pyplot as plt

# Cell 7: Visualize Results
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Plot 1: All metrics vs percentage
ax1 = axes[0, 0]
metrics_to_plot = ['exact_match', 'fuzzy_match', 'token_overlap', 'semantic_similarity']
for metric in metrics_to_plot:
    ax1.plot(df_results['percentage'], df_results[metric], marker='o', label=metric)
ax1.set_xlabel('Context Percentage (%)')
ax1.set_ylabel('Score')
ax1.set_title('All Similarity Metrics vs Context')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Plot 2: Fuzzy match focus
ax2 = axes[0, 1]
ax2.plot(df_results['percentage'], df_results['fuzzy_match'], marker='o', linewidth=2, color='orange')
ax2.axhline(y=0.8, color='r', linestyle='--', label='Strong threshold (0.8)')
ax2.axhline(y=0.6, color='y', linestyle='--', label='Moderate threshold (0.6)')
ax2.set_xlabel('Context Percentage (%)')
ax2.set_ylabel('Fuzzy Match Score')
ax2.set_title('Fuzzy Match: When Does Reproduction Begin?')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Plot 3: Token overlap
ax3 = axes[1, 0]
ax3.plot(df_results['percentage'], df_results['token_overlap'], marker='o', linewidth=2, color='green')
ax3.set_xlabel('Context Percentage (%)')
ax3.set_ylabel('Token Overlap Score')
ax3.set_title('Word-Level Overlap')
ax3.grid(True, alpha=0.3)

# Plot 4: Semantic similarity
ax4 = axes[1, 1]
ax4.plot(df_results['percentage'], df_results['semantic_similarity'], marker='o', linewidth=2, color='purple')
ax4.set_xlabel('Context Percentage (%)')
ax4.set_ylabel('Semantic Similarity Score')
ax4.set_title('Semantic Similarity (Meaning Preservation)')
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()
