# Cross Validation

When you train a probe for a new concept or on a new model, it usually makes sense to first do a cross validation for determining which layer to use.

In [None]:
from tuberlens.config import DATA_DIR, RESULTS_DIR
from tuberlens.cross_validation import ChooseLayerConfig, choose_best_layer_via_cv
from tuberlens.interfaces.probes import ProbeSpec, ProbeType


#dataset_path = DATA_DIR / "high-stakes" / "combined_deployment_22_04_25.jsonl"
dataset_path = DATA_DIR / "deception" / "train_dataset.jsonl"
pos_class_label = "deceptive"
neg_class_label = "honest"
#pos_class_label = "high-stakes"
#neg_class_label = "low-stakes"
model_name = "google/gemma-3n-E4B-it"
max_layer = 35

config = ChooseLayerConfig(
    model_name=model_name, #LOCAL_MODELS[model_name],
    dataset_path=dataset_path,
    max_samples=None,
    cv_folds=4,
    layers=list(range(0, max_layer, 5)),  # Use None to check all layers
    batch_size=4,
    output_dir=RESULTS_DIR / "cross_validation",
    probe_spec=ProbeSpec(name=ProbeType.sklearn, hyperparams={}),
    pos_class_label=pos_class_label,
    neg_class_label=neg_class_label,
    ending_tokens_to_ignore=5,
    start_turn_index=1,
)

results = choose_best_layer_via_cv(config)

In [None]:
print(f"Best layer: {results.best_layer} (accuracy: {results.best_layer_accuracy})")
print(f"Layer mean accuracies: {results.results.layer_mean_accuracies}")