# Build the results dataframe

In [None]:
import os
import re
import pandas as pd


base_dir = "/home/ec2-user/rolevectors/target_direction/pipeline/runs/DeepSeek-R1-Distill-Llama-8B/mathematician/math/1.0/select_direction/-2"

records = []


for layer in os.listdir(base_dir):
    layer_path = os.path.join(base_dir, layer)
    if os.path.isdir(layer_path):

        for filename in os.listdir(layer_path):
            if filename.endswith(".json"):
   
                pattern = r"results_(.+)_-2_(\d+)\.json"
                match = re.match(pattern, filename)
                if match:
                    file_type = match.group(1)      # es. "addition"
                    file_layer = match.group(2)     # es. "16"
                    
                    # Costruiamo il record
                    records.append({
                        "layer": int(file_layer),
                        "type": file_type,
                        "path": os.path.join(layer_path, filename)
                    })

records.append({
    "layer": "baseline",
    "type": "baseline",
    "path": "/home/ec2-user/rolevectors/target_direction/pipeline/runs/DeepSeek-R1-Distill-Llama-8B/mathematician/math/1.0/select_direction/results_baseline.json"
})

df = pd.DataFrame(records)


print(df)


In [None]:
import numpy as np


df_expanded = df.loc[df.index.repeat(5)].copy()


df_expanded['question'] = np.tile(np.arange(1, 6), len(df))


print(df_expanded)

df = df_expanded


In [None]:
import json
from transformers import AutoTokenizer


tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Llama-8B")


file_cache = {}

def get_token_count(row):
    path = row['path']

    q_idx = row['question'] - 1
    

    if path not in file_cache:
        with open(path, 'r', encoding='utf-8') as f:
            file_cache[path] = json.load(f)
    data = file_cache[path]
    

    if q_idx < len(data):
 
        response_text = data[q_idx]["completion"]["response"]
    else:
        response_text = ""
    

    tokens = tokenizer(response_text)["input_ids"]
    return len(tokens)


df['token_generated'] = df.apply(get_token_count, axis=1)


print(df)


In [None]:
import json
import re
import numpy as np

file_cache = {}

def get_correct_value(row):

    path = row['path']
    q_index = row['question'] - 1  

  
    if path not in file_cache:
        with open(path, "r", encoding="utf-8") as f:
            file_cache[path] = json.load(f)
    data = file_cache[path]


    if q_index >= len(data):
        return np.nan

    item = data[q_index]
    

    target = item.get("target_answer", "").strip()
    

    selected = item.get("selected_answer", "").strip()
    if selected != "N/A" and selected != "":
        extracted = selected
    else:

        response = item.get("completion", {}).get("response", "")

        match = re.search(r"\\boxed\{([A-D])\}", response)
        extracted = match.group(1) if match else None
        

        if not extracted:
            bold_match = re.search(r"\*\*Answer:\s*([A-D])\*\*", response)
            extracted = bold_match.group(1) if bold_match else None
        

        if not extracted:
            fallback = re.search(r"Answer:\s*([A-D])", response)
            extracted = fallback.group(1) if fallback else None
        

    if extracted is None:
        return np.nan

    return 1 if extracted == target else 0

df['correct'] = df.apply(get_correct_value, axis=1)


df[df['correct'].isna()]


In [None]:
print(df['correct'].value_counts(dropna=False))

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.lines import Line2D


df_plot = df[(df['type'] == 'addition') | (df['layer'] == 'baseline')].copy()


unique_layers = df_plot['layer'].unique()
ordered_layers = []
if "baseline" in unique_layers:
    ordered_layers.append("baseline")
ordered_layers += sorted([l for l in unique_layers if l != "baseline"], key=lambda x: int(x))


layer_to_x = {layer: i for i, layer in enumerate(ordered_layers)}
df_plot['x_pos'] = df_plot['layer'].apply(lambda x: layer_to_x[x])

jitter = np.random.uniform(-0.1, 0.1, len(df_plot))
df_plot['x_jittered'] = df_plot['x_pos'] + jitter


question_colors = {
    1: 'red',
    2: 'blue',
    3: 'green',
    4: 'orange',
    5: 'purple'
}


marker_map = {1: 'o', 0: 'x'}

plt.figure(figsize=(10, 6))


for q in sorted(df_plot['question'].unique()):
    subset = df_plot[df_plot['question'] == q]

    subset_correct = subset[subset['correct'] == 1]
    if not subset_correct.empty:
        plt.scatter(subset_correct['x_jittered'], subset_correct['token_generated'], 
                    color=question_colors[q], marker=marker_map[1], alpha=0.7)
    

    subset_wrong = subset[subset['correct'] == 0]
    if not subset_wrong.empty:
        plt.scatter(subset_wrong['x_jittered'], subset_wrong['token_generated'], 
                    color=question_colors[q], marker=marker_map[0], alpha=0.7)
    

    subset_unknown = subset[subset['correct'].isna()]
    if not subset_unknown.empty:
        plt.scatter(subset_unknown['x_jittered'], subset_unknown['token_generated'], 
                    color=question_colors[q], marker='s', alpha=0.7)

n_groups = len(ordered_layers)
for i in range(n_groups - 1):
    plt.axvline(x=i + 0.5, color='gray', linestyle='--', alpha=0.5)

plt.xticks(ticks=range(n_groups), labels=ordered_layers)
plt.xlabel("Layer (baseline seguito dai layers numerici)")
plt.ylabel("Numero di token generati")
plt.title("Scatterplot dei token generati per question (tipo addition) per ogni layer")
plt.ylim(0, 17000)

# Legenda per la correttezza della risposta
legend_elements_correct = [
    Line2D([0], [0], marker='o', color='black', linestyle='None', markersize=8, label='Correct'),
    Line2D([0], [0], marker='x', color='black', linestyle='None', markersize=8, label='Wrong'),
    Line2D([0], [0], marker='s', color='black', linestyle='None', markersize=8, label='Unknown')
]
leg1 = plt.legend(handles=legend_elements_correct, title="Response Correctness", loc='upper right')
plt.gca().add_artist(leg1)

# Legenda per il colore delle domande
legend_elements_questions = [
    Line2D([0], [0], marker='o', color=question_colors[q], linestyle='None', markersize=8, label=f'Question {q}')
    for q in sorted(question_colors.keys())
]
plt.legend(handles=legend_elements_questions, title="Question Colors", loc='upper left')

plt.show()


In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import scipy.stats as stats

import matplotlib.style as style
print(style.available)

plt.rcParams['axes.labelsize'] = 12
plt.rcParams['xtick.labelsize'] = 11
plt.rcParams['ytick.labelsize'] = 11
plt.rcParams['legend.fontsize'] = 11
plt.rcParams['figure.titlesize'] = 14

plt.style.use('seaborn-v0_8-whitegrid')

df_filtered = df[(df['type'] == 'ablation') | (df['layer'] == 'baseline')].copy()

# Fill missing values in the correct column with 0
df_filtered['correct_filled'] = df_filtered['correct'].fillna(0)

metrics = (
    df_filtered
    .groupby('layer')
    .agg({'token_generated': 'sum', 'correct_filled': 'sum'})
    .rename(columns={'token_generated': 'total_tokens',
                     'correct_filled': 'total_correct'})
)

unique_layers = metrics.index.tolist()
ordered_layers = []
if 'baseline' in unique_layers:
    ordered_layers.append('baseline')
ordered_layers += sorted([l for l in unique_layers if l != 'baseline'], key=lambda x: int(x))
metrics = metrics.reindex(ordered_layers)

baseline_tokens = metrics.loc['baseline', 'total_tokens']
baseline_correct = metrics.loc['baseline', 'total_correct']

metrics['tokens_ratio'] = metrics['total_tokens'] / baseline_tokens
metrics['correct_ratio'] = metrics['total_correct'] / baseline_correct

def quadrant(row):
    c_ratio = row['correct_ratio']
    t_ratio = row['tokens_ratio']
    if c_ratio >= 1 and t_ratio <= 1:
        return "More correct, fewer tokens"
    elif c_ratio >= 1 and t_ratio >= 1:
        return "More correct, more tokens"
    elif c_ratio <= 1 and t_ratio <= 1:
        return "Fewer correct, fewer tokens"
    else:
        return "Fewer correct, more tokens"

metrics['quadrant'] = metrics.apply(quadrant, axis=1)

quad_colors = {
    "More correct, fewer tokens": "green",
    "More correct, more tokens": "gold",
    "Fewer correct, fewer tokens": "cornflowerblue",
    "Fewer correct, more tokens": "tomato"
}

fig, ax = plt.subplots(figsize=(9, 7))

for quad_label, color in quad_colors.items():
    subset = metrics[metrics['quadrant'] == quad_label]
    ax.scatter(
        subset['correct_ratio'],
        subset['tokens_ratio'],
        s=120,
        alpha=0.8,
        color=color,
        label=quad_label
    )

# Exclude the baseline for regression stats
metrics_no_baseline = metrics.drop('baseline', errors='ignore')

# Plot the regression line without scatter using Seaborn
sns.regplot(
    x='correct_ratio',
    y='tokens_ratio',
    data=metrics_no_baseline,
    ax=ax,
    scatter=False,
    color='gray',
    ci=None
)

# Compute regression stats using scipy.stats
x = metrics_no_baseline['correct_ratio']
y = metrics_no_baseline['tokens_ratio']
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)

# Annotate the plot with the regression statistics
ax.text(0.05, 0.95,
        f"y = {slope:.2f}x + {intercept:.2f}\n$R^2$ = {r_value**2:.2f}\np = {p_value:.3f}",
        transform=ax.transAxes,
        fontsize=10,
        verticalalignment='top',
        bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.7))

baseline_row = metrics.loc['baseline']
ax.scatter(
    baseline_row['correct_ratio'],
    baseline_row['tokens_ratio'],
    s=200,
    alpha=1.0,
    color='black',
    marker='*',
    label='baseline',
    zorder=10
)

# Optional: Annotate each layer (excluding baseline)
for layer_name, row in metrics.iterrows():
    if layer_name == 'baseline' or layer_name != 30:
        continue
    ax.annotate(
        layer_name,
        (row['correct_ratio'], row['tokens_ratio']),
        textcoords="offset points",
        xytext=(6, 6),
        ha='left',
        fontsize=9
    )

ax.axhline(y=1, color='gray', linestyle='--')
ax.axvline(x=1, color='gray', linestyle='--')

ax.set_xlabel("Correct Answers Ratio (Layer / Baseline)")
ax.set_ylabel("Tokens Ratio (Layer / Baseline)")
ax.set_title("Layer Comparison Relative to Baseline (Ablation)")

ax.legend(title="Quadrant / Baseline", loc='best')

plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import matplotlib.style as style
print(style.available)


# Optional: adjust some default fonts and sizes for a nicer look
plt.rcParams['axes.labelsize'] = 12
plt.rcParams['xtick.labelsize'] = 11
plt.rcParams['ytick.labelsize'] = 11
plt.rcParams['legend.fontsize'] = 11
plt.rcParams['figure.titlesize'] = 14

# Use a predefined style for a cleaner look
plt.style.use('seaborn-v0_8-whitegrid')

# Filter the dataset to include rows with type "addition" or layer "baseline"
df_filtered = df[(df['type'] == 'ablation') | (df['layer'] == 'baseline')].copy()

# Fill missing values in the correct column with 0
df_filtered['correct_filled'] = df_filtered['correct'].fillna(0)

# Aggregate metrics for each layer: total tokens and total correct answers
metrics = (
    df_filtered
    .groupby('layer')
    .agg({'token_generated': 'sum', 'correct_filled': 'sum'})
    .rename(columns={'token_generated': 'total_tokens',
                     'correct_filled': 'total_correct'})
)

# Order layers so that baseline comes first and numeric layers follow in ascending order
unique_layers = metrics.index.tolist()
ordered_layers = []
if 'baseline' in unique_layers:
    ordered_layers.append('baseline')
ordered_layers += sorted([l for l in unique_layers if l != 'baseline'], key=lambda x: int(x))
metrics = metrics.reindex(ordered_layers)

# Retrieve baseline values
baseline_tokens = metrics.loc['baseline', 'total_tokens']
baseline_correct = metrics.loc['baseline', 'total_correct']

# Compute ratios relative to baseline
metrics['tokens_ratio'] = metrics['total_tokens'] / baseline_tokens
metrics['correct_ratio'] = metrics['total_correct'] / baseline_correct

# Classify each layer by quadrant relative to baseline
def quadrant(row):
    c_ratio = row['correct_ratio']
    t_ratio = row['tokens_ratio']
    if c_ratio >= 1 and t_ratio <= 1:
        return "More correct, fewer tokens"
    elif c_ratio >= 1 and t_ratio >= 1:
        return "More correct, more tokens"
    elif c_ratio <= 1 and t_ratio <= 1:
        return "Fewer correct, fewer tokens"
    else:
        return "Fewer correct, more tokens"

metrics['quadrant'] = metrics.apply(quadrant, axis=1)

# Define colors for each quadrant
quad_colors = {
    "More correct, fewer tokens": "green",
    "More correct, more tokens": "gold",
    "Fewer correct, fewer tokens": "cornflowerblue",
    "Fewer correct, more tokens": "tomato"
}

# Create a scatter plot showing layers relative to baseline using ratios
fig, ax = plt.subplots(figsize=(9, 7))

# Plot each quadrant with its color
for quad_label, color in quad_colors.items():
    subset = metrics[metrics['quadrant'] == quad_label]
    ax.scatter(
        subset['correct_ratio'],
        subset['tokens_ratio'],
        s=120,
        alpha=0.8,
        color=color,
        label=quad_label
    )

# Highlight the baseline with a different marker
baseline_row = metrics.loc['baseline']
ax.scatter(
    baseline_row['correct_ratio'],
    baseline_row['tokens_ratio'],
    s=200,
    alpha=1.0,
    color='black',
    marker='*',
    label='baseline',
    zorder=10
)

# Annotate each layer
for layer_name, row in metrics.iterrows():
    # If it's baseline, skip annotation (we have a special marker)
    if layer_name == 'baseline':
        continue
    ax.annotate(
        layer_name,
        (row['correct_ratio'], row['tokens_ratio']),
        textcoords="offset points",
        xytext=(6, 6),
        ha='left',
        fontsize=9
    )

# Draw reference lines at ratio = 1
ax.axhline(y=1, color='gray', linestyle='--')
ax.axvline(x=1, color='gray', linestyle='--')

ax.set_xlabel("Correct Answers Ratio (Layer / Baseline)")
ax.set_ylabel("Tokens Ratio (Layer / Baseline)")
ax.set_title("Layer Comparison Relative to Baseline")

# Place the legend
ax.legend(title="Quadrant / Baseline", loc='best')

plt.tight_layout()
plt.show()
