# Steering Vector Evaluation

## Setup & Mount Drive

In [None]:
# IMPORTS
from google.colab import drive, userdata
import sys
import os

In [None]:
print("--- Notebook Setup ---")

# 1. Mount Google Drive
try:
    drive.mount('/content/drive')
    print("Google Drive mounted.")
except Exception as e:
    print(f"Error mounting drive: {e}")

# 2. Get Project Path from Colab Secrets
try:
    PROJECT_PATH = userdata.get('moral_path')
    if PROJECT_PATH is None: raise ValueError("path secret not found.")
    print(f"PROJECT_PATH set to: {PROJECT_PATH}")
except Exception as e:
    print(f"Error getting secret: {e}")

# Set the HF_TOKEN as an environment variable
HF_TOKEN = userdata.get('HF_TOKEN')
if HF_TOKEN:
    os.environ['HF_TOKEN'] = HF_TOKEN
    print("HF_TOKEN successfully set as environment variable.")
else:
    print("HF_TOKEN secret not found.")

# 3. Add project to Python path and change directory
if 'PROJECT_PATH' in locals() and os.path.exists(PROJECT_PATH):
    if PROJECT_PATH not in sys.path:
        sys.path.append(PROJECT_PATH)
        print(f"Added {PROJECT_PATH} to sys.path")
    os.chdir(PROJECT_PATH)
    print(f"Changed current working directory to: {os.getcwd()}")
else:
    print("PROJECT_PATH not valid. Cannot set up environment.")

## Eval Setup

In [None]:

# Import and Configure
from src.steering_benchmarks.py import benchmark_steering_layers
from src.config import MODEL_LIST, STEERING_PROMPT_DIR

# Configuration
# Select Model
MODEL_TO_USE = MODEL_LIST[0] # Llama 3.1 8B

# Select the Vector File created in vector_generation.ipynb
# TODO add as varaible in notebook premable
VECTOR_FILENAME = f"{MODEL_TO_USE.split('/')[-1]}_layers_16-30_vectors.pt"

# Define the range of layers to check
LAYERS_TO_CHECK = list(range(16, 31))

# Sample Size- keep small for inital benchmarking
SAMPLE_SIZE = 50

## Run Benchmark

In [None]:
print(f"Ruuning Benchmark on Activation Addition")
print(f"Model: {MODEL_TO_USE}")
print(f"Vectors: {VECTOR_FILENAME}")
print(f"Testing Layers: {LAYERS_TO_CHECK}")

benchmark_steering_layers(
    model_id=MODEL_TO_USE,
    vector_filename=VECTOR_FILENAME,
    layers_to_test=LAYERS_TO_CHECK,
    sample_size=SAMPLE_SIZE
)