In [1]:
# 1. Imports and Setup
import os
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

In [3]:
# 2. Parameters and Paths
LAYERS       = list(range(0, 33))
DATA_DIR     = '../results'
INSIGHT_FILE = 'probe_layer_insight_v2.csv'
DISPLAY_LABELS = ['past', 'present', 'future']

# Ensure output directories exist for saving confusion matrices
os.makedirs('cm_overall', exist_ok=True)
os.makedirs('cm_per_language', exist_ok=True)

# Remove existing insights CSV if present (optional)
if os.path.exists(INSIGHT_FILE):
    os.remove(INSIGHT_FILE)

In [None]:
# 3. Loop Over Layers
for layer in LAYERS:
    # File paths for this layer
    train_path = f"{DATA_DIR}/llama_train_layer{layer}_features.csv"
    test_path  = f"{DATA_DIR}/llama_test_layer{layer}_features.csv"
    
    # 4. Load Data
    train_df = pd.read_csv(train_path)
    test_df  = pd.read_csv(test_path)
    
    feature_cols = [c for c in train_df.columns if c.startswith('hidden_')]
    X_train = train_df[feature_cols].values
    y_train = train_df['label'].values
    X_test  = test_df[feature_cols].values
    y_test  = test_df['label'].values
    
    # 5. Train Linear Probe
    probe = LogisticRegression(
        penalty='l1',
        solver='saga',
        C=1e-2,
        max_iter=100,
        n_jobs=6
    )
    probe.fit(X_train, y_train)
    y_pred = probe.predict(X_test)
    
    # 6. Overall Evaluation
    overall_acc = accuracy_score(y_test, y_pred)
    overall_report_dict = classification_report(
        y_test, y_pred,
        target_names=DISPLAY_LABELS,
        output_dict=True,
        zero_division=0
    )
    print(f"Layer {layer} - Overall Accuracy: {overall_acc:.4f}")
    print(classification_report(y_test, y_pred, target_names=DISPLAY_LABELS))
    
    # Save overall confusion matrix figure
    cm = confusion_matrix(y_test, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=DISPLAY_LABELS)
    fig, ax = plt.subplots(figsize=(4,4))
    disp.plot(cmap=plt.cm.Blues, ax=ax)
    ax.set_title(f"Layer {layer} - Overall CM")
    fig.savefig(f"cm_overall/layer{layer}_overall_cm.png")
    plt.close(fig)
    
    # 7. Language-Specific Evaluation
    lang_results = {}
    if 'language' in test_df.columns:
        for lang in test_df['language'].unique():
            mask = (test_df['language'] == lang)
            y_true_lang = y_test[mask]
            y_pred_lang = probe.predict(X_test[mask])
            
            acc_lang = accuracy_score(y_true_lang, y_pred_lang)
            report_dict = classification_report(
                y_true_lang, y_pred_lang,
                target_names=DISPLAY_LABELS,
                output_dict=True,
                zero_division=0
            )
            lang_results[lang] = {
                'accuracy': acc_lang,
                'report': report_dict
            }
            
            print(f"Layer {layer}, Language: {lang}, Accuracy: {acc_lang:.4f}")
            print(classification_report(y_true_lang, y_pred_lang, target_names=DISPLAY_LABELS))
            
            # Save per-language confusion matrix figure
            cm_lang = confusion_matrix(y_true_lang, y_pred_lang)
            disp_lang = ConfusionMatrixDisplay(confusion_matrix=cm_lang, display_labels=DISPLAY_LABELS)
            fig_lang, ax_lang = plt.subplots(figsize=(4,4))
            disp_lang.plot(cmap=plt.cm.Blues, ax=ax_lang)
            ax_lang.set_title(f"Layer {layer} - {lang} CM")
            fig_lang.savefig(f"cm_per_language/layer{layer}_{lang}_cm.png")
            plt.close(fig_lang)
    else:
        print(f"Layer {layer} - No 'language' column; skipping per-language evaluation.")
    
    # 8. Log Insights to CSV
    data_entry = {
        'layer': layer,
        'overall_acc': overall_acc,
        'overall_f1': overall_report_dict['macro avg']['f1-score']
    }
    # Add per-language accuracy and F1
    for lang, info in lang_results.items():
        data_entry[f'acc_{lang}'] = info['accuracy']
        data_entry[f'f1_{lang}']  = info['report']['macro avg']['f1-score']
        # Also store per-tense F1 for each language
        for tense in DISPLAY_LABELS:
            data_entry[f'f1_{lang}_{tense}'] = info['report'][tense]['f1-score']
    
    # Convert to DataFrame and append
    df_entry = pd.DataFrame([data_entry])
    header = not os.path.exists(INSIGHT_FILE)
    df_entry.to_csv(INSIGHT_FILE, mode='a', header=header, index=False)

    print(f"Layer {layer} insights appended to {INSIGHT_FILE}.\n")

Layer 0 - Overall Accuracy: 0.3332
              precision    recall  f1-score   support

        past       0.00      0.00      0.00      1549
     present       0.00      0.00      0.00      1549
      future       0.33      1.00      0.50      1548

    accuracy                           0.33      4646
   macro avg       0.11      0.33      0.17      4646
weighted avg       0.11      0.33      0.17      4646



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Layer 0, Language: en, Accuracy: 0.3333
              precision    recall  f1-score   support

        past       0.00      0.00      0.00       200
     present       0.00      0.00      0.00       200
      future       0.33      1.00      0.50       200

    accuracy                           0.33       600
   macro avg       0.11      0.33      0.17       600
weighted avg       0.11      0.33      0.17       600

Layer 0, Language: de, Accuracy: 0.3333
              precision    recall  f1-score   support

        past       0.00      0.00      0.00       200
     present       0.00      0.00      0.00       200
      future       0.33      1.00      0.50       200

    accuracy                           0.33       600
   macro avg       0.11      0.33      0.17       600
weighted avg       0.11      0.33      0.17       600



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Layer 0, Language: fr, Accuracy: 0.3333
              precision    recall  f1-score   support

        past       0.00      0.00      0.00       200
     present       0.00      0.00      0.00       200
      future       0.33      1.00      0.50       200

    accuracy                           0.33       600
   macro avg       0.11      0.33      0.17       600
weighted avg       0.11      0.33      0.17       600

Layer 0, Language: it, Accuracy: 0.3333
              precision    recall  f1-score   support

        past       0.00      0.00      0.00       200
     present       0.00      0.00      0.00       200
      future       0.33      1.00      0.50       200

    accuracy                           0.33       600
   macro avg       0.11      0.33      0.17       600
weighted avg       0.11      0.33      0.17       600



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Layer 0, Language: pt, Accuracy: 0.3333
              precision    recall  f1-score   support

        past       0.00      0.00      0.00       200
     present       0.00      0.00      0.00       200
      future       0.33      1.00      0.50       200

    accuracy                           0.33       600
   macro avg       0.11      0.33      0.17       600
weighted avg       0.11      0.33      0.17       600

Layer 0, Language: es, Accuracy: 0.3333
              precision    recall  f1-score   support

        past       0.00      0.00      0.00       200
     present       0.00      0.00      0.00       200
      future       0.33      1.00      0.50       200

    accuracy                           0.33       600
   macro avg       0.11      0.33      0.17       600
weighted avg       0.11      0.33      0.17       600



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Layer 0, Language: hi, Accuracy: 0.3333
              precision    recall  f1-score   support

        past       0.00      0.00      0.00       200
     present       0.00      0.00      0.00       200
      future       0.33      1.00      0.50       200

    accuracy                           0.33       600
   macro avg       0.11      0.33      0.17       600
weighted avg       0.11      0.33      0.17       600

Layer 0, Language: th, Accuracy: 0.3318
              precision    recall  f1-score   support

        past       0.00      0.00      0.00       149
     present       0.00      0.00      0.00       149
      future       0.33      1.00      0.50       148

    accuracy                           0.33       446
   macro avg       0.11      0.33      0.17       446
weighted avg       0.11      0.33      0.17       446



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Layer 0 insights appended to probe_layer_insight_v2.csv.

Layer 1 - Overall Accuracy: 0.3334
              precision    recall  f1-score   support

        past       0.00      0.00      0.00      1549
     present       0.33      1.00      0.50      1549
      future       0.00      0.00      0.00      1548

    accuracy                           0.33      4646
   macro avg       0.11      0.33      0.17      4646
weighted avg       0.11      0.33      0.17      4646



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Layer 1, Language: en, Accuracy: 0.3333
              precision    recall  f1-score   support

        past       0.00      0.00      0.00       200
     present       0.33      1.00      0.50       200
      future       0.00      0.00      0.00       200

    accuracy                           0.33       600
   macro avg       0.11      0.33      0.17       600
weighted avg       0.11      0.33      0.17       600

Layer 1, Language: de, Accuracy: 0.3333
              precision    recall  f1-score   support

        past       0.00      0.00      0.00       200
     present       0.33      1.00      0.50       200
      future       0.00      0.00      0.00       200

    accuracy                           0.33       600
   macro avg       0.11      0.33      0.17       600
weighted avg       0.11      0.33      0.17       600



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Layer 1, Language: fr, Accuracy: 0.3333
              precision    recall  f1-score   support

        past       0.00      0.00      0.00       200
     present       0.33      1.00      0.50       200
      future       0.00      0.00      0.00       200

    accuracy                           0.33       600
   macro avg       0.11      0.33      0.17       600
weighted avg       0.11      0.33      0.17       600

Layer 1, Language: it, Accuracy: 0.3333
              precision    recall  f1-score   support

        past       0.00      0.00      0.00       200
     present       0.33      1.00      0.50       200
      future       0.00      0.00      0.00       200

    accuracy                           0.33       600
   macro avg       0.11      0.33      0.17       600
weighted avg       0.11      0.33      0.17       600



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Layer 1, Language: pt, Accuracy: 0.3333
              precision    recall  f1-score   support

        past       0.00      0.00      0.00       200
     present       0.33      1.00      0.50       200
      future       0.00      0.00      0.00       200

    accuracy                           0.33       600
   macro avg       0.11      0.33      0.17       600
weighted avg       0.11      0.33      0.17       600

Layer 1, Language: es, Accuracy: 0.3333
              precision    recall  f1-score   support

        past       0.00      0.00      0.00       200
     present       0.33      1.00      0.50       200
      future       0.00      0.00      0.00       200

    accuracy                           0.33       600
   macro avg       0.11      0.33      0.17       600
weighted avg       0.11      0.33      0.17       600



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Layer 1, Language: hi, Accuracy: 0.3333
              precision    recall  f1-score   support

        past       0.00      0.00      0.00       200
     present       0.33      1.00      0.50       200
      future       0.00      0.00      0.00       200

    accuracy                           0.33       600
   macro avg       0.11      0.33      0.17       600
weighted avg       0.11      0.33      0.17       600

Layer 1, Language: th, Accuracy: 0.3341
              precision    recall  f1-score   support

        past       0.00      0.00      0.00       149
     present       0.33      1.00      0.50       149
      future       0.00      0.00      0.00       148

    accuracy                           0.33       446
   macro avg       0.11      0.33      0.17       446
weighted avg       0.11      0.33      0.17       446



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Layer 1 insights appended to probe_layer_insight_v2.csv.

Layer 2 - Overall Accuracy: 0.4245
              precision    recall  f1-score   support

        past       0.46      0.68      0.55      1549
     present       0.00      0.00      0.00      1549
      future       0.39      0.59      0.47      1548

    accuracy                           0.42      4646
   macro avg       0.28      0.42      0.34      4646
weighted avg       0.28      0.42      0.34      4646



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Layer 2, Language: en, Accuracy: 0.3417
              precision    recall  f1-score   support

        past       0.33      0.56      0.42       200
     present       0.00      0.00      0.00       200
      future       0.36      0.46      0.40       200

    accuracy                           0.34       600
   macro avg       0.23      0.34      0.27       600
weighted avg       0.23      0.34      0.27       600

Layer 2, Language: de, Accuracy: 0.3950
              precision    recall  f1-score   support

        past       0.41      0.54      0.47       200
     present       0.00      0.00      0.00       200
      future       0.38      0.65      0.48       200

    accuracy                           0.40       600
   macro avg       0.26      0.40      0.32       600
weighted avg       0.26      0.40      0.32       600



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Layer 2, Language: fr, Accuracy: 0.3517
              precision    recall  f1-score   support

        past       0.33      0.35      0.34       200
     present       0.00      0.00      0.00       200
      future       0.36      0.70      0.48       200

    accuracy                           0.35       600
   macro avg       0.23      0.35      0.27       600
weighted avg       0.23      0.35      0.27       600

Layer 2, Language: it, Accuracy: 0.4783
              precision    recall  f1-score   support

        past       0.53      0.71      0.61       200
     present       0.00      0.00      0.00       200
      future       0.43      0.72      0.54       200

    accuracy                           0.48       600
   macro avg       0.32      0.48      0.38       600
weighted avg       0.32      0.48      0.38       600



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Layer 2, Language: pt, Accuracy: 0.4550
              precision    recall  f1-score   support

        past       0.60      0.62      0.61       200
     present       0.00      0.00      0.00       200
      future       0.38      0.74      0.50       200

    accuracy                           0.46       600
   macro avg       0.33      0.46      0.37       600
weighted avg       0.33      0.46      0.37       600

Layer 2, Language: es, Accuracy: 0.4783
              precision    recall  f1-score   support

        past       0.47      0.82      0.60       200
     present       0.00      0.00      0.00       200
      future       0.49      0.61      0.54       200

    accuracy                           0.48       600
   macro avg       0.32      0.48      0.38       600
weighted avg       0.32      0.48      0.38       600



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Layer 2, Language: hi, Accuracy: 0.4750
              precision    recall  f1-score   support

        past       0.62      0.99      0.76       200
     present       0.00      0.00      0.00       200
      future       0.31      0.43      0.36       200

    accuracy                           0.47       600
   macro avg       0.31      0.48      0.37       600
weighted avg       0.31      0.47      0.37       600

Layer 2, Language: th, Accuracy: 0.4193
              precision    recall  f1-score   support

        past       0.40      0.91      0.56       149
     present       0.00      0.00      0.00       149
      future       0.47      0.35      0.40       148

    accuracy                           0.42       446
   macro avg       0.29      0.42      0.32       446
weighted avg       0.29      0.42      0.32       446



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Layer 2 insights appended to probe_layer_insight_v2.csv.

Layer 3 - Overall Accuracy: 0.4830
              precision    recall  f1-score   support

        past       0.53      0.75      0.62      1549
     present       0.45      0.34      0.39      1549
      future       0.43      0.36      0.39      1548

    accuracy                           0.48      4646
   macro avg       0.47      0.48      0.47      4646
weighted avg       0.47      0.48      0.47      4646

Layer 3, Language: en, Accuracy: 0.3500
              precision    recall  f1-score   support

        past       0.36      0.57      0.44       200
     present       0.35      0.09      0.14       200
      future       0.34      0.39      0.36       200

    accuracy                           0.35       600
   macro avg       0.35      0.35      0.31       600
weighted avg       0.35      0.35      0.31       600

Layer 3, Language: de, Accuracy: 0.4183
              precision    recall  f1-score   support

        pa

In [None]:
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

# Configuration
NUM_LAYERS = 32
LAYERS = list(range(1, NUM_LAYERS + 1))
DISPLAY_LABELS = ['past','present','future']

# Folders where confusion‐matrix PNGs were saved
OVERALL_CM_DIR     = "cm_overall"
PER_LANG_CM_DIR    = "cm_per_language"
OUTPUT_DIR         = "cm_snapshots"
os.makedirs(OUTPUT_DIR, exist_ok=True)

def plot_cm_grid(image_paths, title, output_filename):
    """
    Given a list of filepaths to saved CM PNGs (one per layer),
    arrange them in an 8×4 grid (32 total) and save a single figure.
    """
    n_rows, n_cols = 8, 4
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 2, n_rows * 2))
    fig.suptitle(title, fontsize=16, y=0.92)
    
    for idx, layer in enumerate(LAYERS):
        row = idx // n_cols
        col = idx % n_cols
        ax = axes[row][col]
        img_path = image_paths[layer]
        
        # If file is missing, show empty axes
        if not os.path.exists(img_path):
            ax.axis('off')
            continue
        
        img = mpimg.imread(img_path)
        ax.imshow(img)
        ax.axis('off')
        ax.set_title(f"L{layer}", fontsize=8)
    
    # Turn off any unused subplots (if NUM_LAYERS < 32)
    for idx in range(len(LAYERS), n_rows * n_cols):
        row = idx // n_cols
        col = idx % n_cols
        axes[row][col].axis('off')
    
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    fig.savefig(os.path.join(OUTPUT_DIR, output_filename), dpi=150)
    plt.close(fig)

# 1. Collect overall‐CM paths
overall_paths = {}
for layer in LAYERS:
    fname = f"layer{layer}_overall_cm.png"
    overall_paths[layer] = os.path.join(OVERALL_CM_DIR, fname)

# Plot all overall‐CMs in one 8×4 grid
plot_cm_grid(
    overall_paths,
    title="Overall Confusion Matrices: Layers 1–32",
    output_filename="all_layers_overall_cm.png"
)

# 2. For each language, collect and plot its per‐layer CMs
for lang in ['en','de','fr','it','pt','es','hi','th']:
    lang_paths = {}
    for layer in LAYERS:
        fname = f"layer{layer}_{lang}_cm.png"
        lang_paths[layer] = os.path.join(PER_LANG_CM_DIR, fname)
    
    plot_cm_grid(
        lang_paths,
        title=f"Confusion Matrices for \"{lang}\": Layers 1–32",
        output_filename=f"all_layers_{lang}_cm.png"
    )
