In [1]:
import os
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

In [2]:
# Layers to probe
LAYERS     = list(range(0, 33))
TRAIN_TMPL = "../results/llama_train_layer{}_features.csv"
TEST_TMPL  = "../results/llama_test_layer{}_features.csv"
display_labels = ['past','present','future']

# grid of C values and output filenames
grid = [
#     (1e-3, "probe_detailed_insight_111.csv"),
    (3e-3, "probe_detailed_insight_11.csv"),
]

In [None]:
for C_val, OUT_CSV in grid:
    # remove old file if it exists
    if os.path.exists(OUT_CSV):
        os.remove(OUT_CSV)
    
    for idx, layer in enumerate(LAYERS):
        # load features
        df_tr = pd.read_csv(TRAIN_TMPL.format(layer))
        df_te = pd.read_csv(TEST_TMPL.format(layer))
        
        # prepare train/test arrays
        feat_cols = [c for c in df_tr.columns if c.startswith('hidden_')]
        X_tr, y_tr = df_tr[feat_cols].values, df_tr['label'].values
        X_te, y_te = df_te[feat_cols].values, df_te['label'].values
        
        # grid-search over the two C values for this layer
        # here we simply fix C to C_val, but you can replace this with GridSearchCV
        probe = LogisticRegression(
            penalty='l1',
            solver='saga',
            C=C_val,
            max_iter=100,
            n_jobs=7
        )
        probe.fit(X_tr, y_tr)
        y_pred = probe.predict(X_te)
        
        # overall metrics
        rpt = classification_report(
            y_te, y_pred,
            target_names=display_labels,
            output_dict=True,
            zero_division=0
        )
        row = {
            'layer':       layer,
            'C':           C_val,
            'overall_acc': rpt['accuracy'],
            'overall_f1':  rpt['macro avg']['f1-score'],
            'past':  rpt['past']['f1-score'],
            'present':  rpt['present']['f1-score'],
            'future':  rpt['future']['f1-score']
        }
        
        # per-language & per-tense metrics
        for lang in df_te['language'].unique():
            mask = (df_te['language'] == lang)
            rpt_l = classification_report(
                y_te[mask], y_pred[mask],
                target_names=display_labels,
                output_dict=True,
                zero_division=0
            )
            row[f'acc_{lang}']         = rpt_l['accuracy']
            row[f'f1_{lang}']          = rpt_l['macro avg']['f1-score']
            row[f'f1_{lang}_past']     = rpt_l['past']['f1-score']
            row[f'f1_{lang}_present']  = rpt_l['present']['f1-score']
            row[f'f1_{lang}_future']   = rpt_l['future']['f1-score']
        
        # write single-row result
        df_row = pd.DataFrame([row])
        header = (idx == 0)
        df_row.to_csv(OUT_CSV, mode='a', header=header, index=False)
        print(f"Processed L={layer}")
    
    print(f"Completed grid C={C_val}, results saved in {OUT_CSV}")

Processed L=0
Processed L=1
Processed L=2
Processed L=3
Processed L=4
Processed L=5
Processed L=6
Processed L=7
Processed L=8
Processed L=9
Processed L=10
Processed L=11
