In [1]:
%load_ext autoreload
%autoreload 2
import torch
import numpy as np

from unlearning.tool import get_hf_model
from unlearning.feature_activation import get_forget_retain_data, tokenize_dataset, get_feature_activation_sparsity, get_top_features
from unlearning.jump_relu import load_gemma2_2b_sae
from unlearning.intervention import scaling_intervention
from unlearning.metrics import calculate_MCQ_metrics

In [2]:
model = get_hf_model('google/gemma-2-2b-it')

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
forget_dataset, retain_dataset = get_forget_retain_data('bio-forget-corpus', 'wikitext')

print(len(forget_dataset), len(forget_dataset[0]))
print(len(retain_dataset), len(retain_dataset[0]))

forget_tokens = tokenize_dataset(model, forget_dataset)
retain_tokens = tokenize_dataset(model, retain_dataset)

print(forget_tokens.shape, retain_tokens.shape)

24432 16027
1962 859
torch.Size([153108, 1024]) torch.Size([275, 1024])


In [4]:
for layer in range(20, model.config.num_hidden_layers):
    sae = load_gemma2_2b_sae(layer=layer)

    # shuffle forget_tokens 
    shuffled_forget_tokens = forget_tokens[torch.randperm(forget_tokens.shape[0])]

    mean_feature_activation_forget = get_feature_activation_sparsity(model, sae, shuffled_forget_tokens[:2048], batch_size=8)
    mean_feature_activation_retain = get_feature_activation_sparsity(model, sae, retain_tokens, batch_size=8)

    top_features = get_top_features(mean_feature_activation_forget, mean_feature_activation_retain, 0.01)
    np.savetxt(f'../data/top_features/gemma-2-2b-it-sparsity/layer{layer}.txt', top_features, fmt='%d')
    np.savetxt(f'../data/top_features/gemma-2-2b-it-sparsity/layer{layer}_mean_feature_activation_forget.txt', mean_feature_activation_forget, fmt='%f')
    np.savetxt(f'../data/top_features/gemma-2-2b-it-sparsity/layer{layer}_mean_feature_activation_retain.txt', mean_feature_activation_retain, fmt='%f')

Found SAE with l0=71 at path google/gemma-scope-2b-pt-res/layer_20/width_16k/average_l0_71/params.npz


params.npz:   0%|          | 0.00/302M [00:00<?, ?B/s]

100%|██████████| 256/256 [03:57<00:00,  1.08it/s]
100%|██████████| 35/35 [00:32<00:00,  1.09it/s]


[  373   143  4544 12558 14636  8984  6720 15231  5447 10630 14012  5748
 12945  9455  6344  1911  2348  5481 10879  3074]
Found SAE with l0=70 at path google/gemma-scope-2b-pt-res/layer_21/width_16k/average_l0_70/params.npz


params.npz:   0%|          | 0.00/302M [00:00<?, ?B/s]

100%|██████████| 256/256 [03:57<00:00,  1.08it/s]
100%|██████████| 35/35 [00:31<00:00,  1.12it/s]


[13865 13042 12286 15646 13503   809  9161  2372 13352  1733  4019 16332
  3406 12272  1465  9912 16037 11455 14499 12393]
Found SAE with l0=72 at path google/gemma-scope-2b-pt-res/layer_22/width_16k/average_l0_72/params.npz


params.npz:   0%|          | 0.00/302M [00:00<?, ?B/s]

100%|██████████| 256/256 [03:56<00:00,  1.08it/s]
100%|██████████| 35/35 [00:31<00:00,  1.10it/s]


[12592  8492  4923 13290  6637 14878   438  2395 10399  8046  4292 11754
 14071  5398  7155   425 13539  4958  2106  8874]
Found SAE with l0=75 at path google/gemma-scope-2b-pt-res/layer_23/width_16k/average_l0_75/params.npz


params.npz:   0%|          | 0.00/302M [00:00<?, ?B/s]

100%|██████████| 256/256 [03:53<00:00,  1.10it/s]
100%|██████████| 35/35 [00:31<00:00,  1.12it/s]


[15701 13245   744 11135 11419  4012  4813  3064 15221 12788  1107  9923
 10889  3539 13597   571  3632  3676  4120 10072]
Found SAE with l0=73 at path google/gemma-scope-2b-pt-res/layer_24/width_16k/average_l0_73/params.npz


params.npz:   0%|          | 0.00/302M [00:00<?, ?B/s]

100%|██████████| 256/256 [03:51<00:00,  1.11it/s]
100%|██████████| 35/35 [00:31<00:00,  1.12it/s]


[ 1121  2625  5026 16337 12292 13865  4056  3867 10961  8977  1873  8615
  3186  9233 12155  3264  7612  7546  9319 13492]
Found SAE with l0=116 at path google/gemma-scope-2b-pt-res/layer_25/width_16k/average_l0_116/params.npz


params.npz:   0%|          | 0.00/302M [00:00<?, ?B/s]

100%|██████████| 256/256 [03:51<00:00,  1.11it/s]
100%|██████████| 35/35 [00:31<00:00,  1.12it/s]

[14035 15357 15365 13399  9171  6536   733  3709 11878  4914  3082  3429
 11938  7493  6609  7147  5582  8622  9474 16117]





In [5]:
layer = 3
sae = load_gemma2_2b_sae(layer=layer)

# shuffle forget_tokens 
shuffled_forget_tokens = forget_tokens[torch.randperm(forget_tokens.shape[0])]

mean_feature_activation_forget = get_feature_activation_sparsity(model, sae, shuffled_forget_tokens[:2048], batch_size=8)
mean_feature_activation_retain = get_feature_activation_sparsity(model, sae, retain_tokens, batch_size=8)

# top_features = get_top_features(mean_feature_activation_forget, mean_feature_activation_retain, 0.01)
# np.savetxt(f'../data/top_features/gemma-2-2b-it-sparsity/layer{layer}.txt', top_features, fmt='%d')

Found SAE with l0=59 at path google/gemma-scope-2b-pt-res/layer_3/width_16k/average_l0_59/params.npz


100%|██████████| 256/256 [04:10<00:00,  1.02it/s]
100%|██████████| 35/35 [00:32<00:00,  1.06it/s]


In [6]:
top_features_from_mean = np.loadtxt(f'../data/top_features/gemma-2-2b-it/layer{layer}.txt', dtype=int)

In [7]:
import plotly.express as px

def plot_comparison(forget, retain, good_feature_lst=[]):
    # add color to selected features
    color = np.array(['Normal'] * len(forget))
    color[good_feature_lst] = 'Selected from MCQ'
    
    # main plot
    fig = px.scatter(x=forget, y=retain, labels={'x': 'Forget', 'y': 'Retain'}, hover_data=[np.arange(len(forget))], color=color)
    
    # add a diagonal line
    max_val = min(max(forget), max(retain))
    fig.add_shape(
        type="line", line=dict(dash="dash"),
        x0=0, y0=0, x1=max_val, y1=max_val
    )

    fig.show()
    
    
# plot_comparison(mean_feature_activation_forget, mean_feature_activation_retain, good_feature_lst=top_features_from_mean[:20])
    
    

In [8]:
top_features = get_top_features(mean_feature_activation_forget, mean_feature_activation_retain, 0.01)

[ 8786  3025 11913 14227   679 10793  8803  1082  6691  9839 16064  8934
   616 10238 16169 13219  8396  3061  8845  2659]


In [9]:
from sae_lens.analysis.neuronpedia_integration import get_neuronpedia_quick_list

# get_neuronpedia_quick_list([373] + list(top_features[:20]), layer=3, model='gemma-2-2b', dataset='gemmascope-res-16k')

In [10]:
top_features[:20]

array([ 8786,  3025, 11913, 14227,   679, 10793,  8803,  1082,  6691,
        9839, 16064,  8934,   616, 10238, 16169, 13219,  8396,  3061,
        8845,  2659])

In [11]:
from unlearning.intervention import intervention
from unlearning.metrics import calculate_metrics_rmu

dataset_names = ['wmdp-bio', 'high_school_us_history', 'high_school_geography', 'college_computer_science', 'human_aging', 'college_biology']
metric_params = {d: {'target_metric': 'correct'} for d in dataset_names}

with intervention(model, layer, sae, list(top_features[:20]), multiplier=100, intervention_type='clamp'):
    results_all = calculate_metrics_rmu(model, dataset_names, metric_params=metric_params)

100%|██████████| 87/87 [00:09<00:00,  9.64it/s]
100%|██████████| 18/18 [00:04<00:00,  4.02it/s]
100%|██████████| 18/18 [00:01<00:00, 11.45it/s]
100%|██████████| 2/2 [00:00<00:00,  8.05it/s]
100%|██████████| 14/14 [00:01<00:00,  9.12it/s]
100%|██████████| 13/13 [00:01<00:00,  8.97it/s]


In [12]:
for dataset in results_all:
    print(f'{dataset}: {results_all[dataset]["mean_correct"]}')

wmdp-bio: 0.3773946166038513
high_school_us_history: 0.9811320900917053
high_school_geography: 0.8557692766189575
college_computer_science: 0.7777777910232544
human_aging: 0.5555555820465088
college_biology: 0.3561643958091736


In [13]:
with intervention(model, layer, sae, list(top_features_from_mean[:20]), multiplier=100, intervention_type='clamp'):
    results_all_mean = calculate_metrics_rmu(model, dataset_names, metric_params=metric_params)
    
for dataset in results_all_mean:
    print(f'{dataset}: {results_all_mean[dataset]["mean_correct"]}')

100%|██████████| 87/87 [00:08<00:00,  9.79it/s]
100%|██████████| 18/18 [00:04<00:00,  4.03it/s]
100%|██████████| 18/18 [00:01<00:00, 11.23it/s]
100%|██████████| 2/2 [00:00<00:00, 10.36it/s]
100%|██████████| 14/14 [00:01<00:00, 10.95it/s]


In [14]:
from unlearning.intervention import intervention
from unlearning.metrics import calculate_metrics_rmu

dataset_names = ['wmdp-bio', 'high_school_us_history', 'high_school_geography', 'college_computer_science', 'human_aging', 'college_biology']
metric_params = {d: {'target_metric': 'all'} for d in dataset_names}

with intervention(model, layer, sae, list(top_features[:20]), multiplier=20, intervention_type='scale'):
    results_all = calculate_metrics_rmu(model, dataset_names, metric_params=metric_params)
    
with intervention(model, layer, sae, list(top_features_from_mean[:20]), multiplier=20, intervention_type='scale'):
    results_all_mean = calculate_metrics_rmu(model, dataset_names, metric_params=metric_params)
    
print('Sparsity')
for dataset in results_all:
    print(f'{dataset}: {results_all[dataset]["mean_correct"]}')
    
print('Mean')
for dataset in results_all_mean:
    print(f'{dataset}: {results_all_mean[dataset]["mean_correct"]}')

100%|██████████| 213/213 [00:21<00:00, 10.11it/s]
100%|██████████| 34/34 [00:08<00:00,  4.08it/s]
100%|██████████| 33/33 [00:02<00:00, 11.50it/s]
100%|██████████| 17/17 [00:02<00:00,  7.71it/s]
100%|██████████| 38/38 [00:03<00:00, 10.00it/s]
100%|██████████| 24/24 [00:02<00:00,  8.92it/s]
100%|██████████| 213/213 [00:20<00:00, 10.15it/s]
100%|██████████| 34/34 [00:08<00:00,  4.07it/s]
100%|██████████| 33/33 [00:03<00:00,  9.98it/s]
100%|██████████| 17/17 [00:02<00:00,  7.72it/s]
100%|██████████| 38/38 [00:03<00:00, 10.44it/s]
100%|██████████| 24/24 [00:02<00:00,  9.46it/s]

Sparsity
wmdp-bio: 0.35349568724632263
high_school_us_history: 0.6372549533843994
high_school_geography: 0.6868686676025391
college_computer_science: 0.44999998807907104
human_aging: 0.5022422075271606
college_biology: 0.4166666567325592
Mean
wmdp-bio: 0.5090337991714478
high_school_us_history: 0.75
high_school_geography: 0.7222222089767456
college_computer_science: 0.44999998807907104
human_aging: 0.6098654866218567
college_biology: 0.5902777910232544





In [15]:
from unlearning.intervention import intervention
from unlearning.metrics import calculate_metrics_rmu

dataset_names = ['wmdp-bio', 'high_school_us_history', 'high_school_geography', 'college_computer_science', 'human_aging', 'college_biology']
metric_params = {d: {'target_metric': 'all'} for d in dataset_names}

for n_features in [1, 5, 10, 15, 20]:
    for multiplier in [0, 5, 10, 15, 20]:
        with intervention(model, layer, sae, list(top_features[:n_features]), multiplier=multiplier, intervention_type='scale'):
            results_all = calculate_metrics_rmu(model, dataset_names, metric_params=metric_params)
            
        print(f'n_features: {n_features}, {multiplier}x')
        for dataset in results_all:
            print(f'\t\t{dataset}: {results_all[dataset]["mean_correct"]}')
            

100%|██████████| 213/213 [00:22<00:00,  9.55it/s]
100%|██████████| 34/34 [00:08<00:00,  4.07it/s]
100%|██████████| 33/33 [00:03<00:00, 10.08it/s]
100%|██████████| 17/17 [00:02<00:00,  7.71it/s]
100%|██████████| 38/38 [00:03<00:00,  9.86it/s]
100%|██████████| 24/24 [00:02<00:00,  9.82it/s]


n_features: 1, 0x
		wmdp-bio: 0.6355066895484924
		high_school_us_history: 0.7401961088180542
		high_school_geography: 0.7575757503509521
		college_computer_science: 0.44999998807907104
		human_aging: 0.6322870254516602
		college_biology: 0.7083333134651184


100%|██████████| 213/213 [00:22<00:00,  9.59it/s]
100%|██████████| 34/34 [00:08<00:00,  4.08it/s]
100%|██████████| 33/33 [00:03<00:00,  9.52it/s]
100%|██████████| 17/17 [00:02<00:00,  7.75it/s]
100%|██████████| 38/38 [00:03<00:00, 11.47it/s]
100%|██████████| 24/24 [00:02<00:00,  9.81it/s]


n_features: 1, 5x
		wmdp-bio: 0.6386488676071167
		high_school_us_history: 0.7549020051956177
		high_school_geography: 0.747474730014801
		college_computer_science: 0.4399999976158142
		human_aging: 0.6278027296066284
		college_biology: 0.7083333134651184


100%|██████████| 213/213 [00:21<00:00,  9.96it/s]
100%|██████████| 34/34 [00:08<00:00,  4.09it/s]
100%|██████████| 33/33 [00:03<00:00, 10.51it/s]
100%|██████████| 17/17 [00:02<00:00,  7.66it/s]
100%|██████████| 38/38 [00:03<00:00, 11.64it/s]
100%|██████████| 24/24 [00:02<00:00,  9.59it/s]


n_features: 1, 10x
		wmdp-bio: 0.637077808380127
		high_school_us_history: 0.7450980544090271
		high_school_geography: 0.752525269985199
		college_computer_science: 0.41999998688697815
		human_aging: 0.6278027296066284
		college_biology: 0.7152777910232544


100%|██████████| 213/213 [00:22<00:00,  9.53it/s]
100%|██████████| 34/34 [00:08<00:00,  4.06it/s]
100%|██████████| 33/33 [00:02<00:00, 11.63it/s]
100%|██████████| 17/17 [00:02<00:00,  7.62it/s]
100%|██████████| 38/38 [00:03<00:00, 11.72it/s]
100%|██████████| 24/24 [00:02<00:00,  9.85it/s]


n_features: 1, 15x
		wmdp-bio: 0.6386488676071167
		high_school_us_history: 0.7352941632270813
		high_school_geography: 0.7575757503509521
		college_computer_science: 0.429999977350235
		human_aging: 0.6188341379165649
		college_biology: 0.7152777910232544


100%|██████████| 213/213 [00:22<00:00,  9.37it/s]
100%|██████████| 34/34 [00:08<00:00,  4.01it/s]
100%|██████████| 33/33 [00:02<00:00, 11.04it/s]
100%|██████████| 17/17 [00:02<00:00,  7.73it/s]
100%|██████████| 38/38 [00:03<00:00, 11.66it/s]
100%|██████████| 24/24 [00:02<00:00,  9.83it/s]


n_features: 1, 20x
		wmdp-bio: 0.6465043425559998
		high_school_us_history: 0.7303921580314636
		high_school_geography: 0.7575757503509521
		college_computer_science: 0.4399999976158142
		human_aging: 0.6098654866218567
		college_biology: 0.7152777910232544


100%|██████████| 213/213 [00:22<00:00,  9.27it/s]
100%|██████████| 34/34 [00:08<00:00,  4.02it/s]
100%|██████████| 33/33 [00:02<00:00, 11.58it/s]
100%|██████████| 17/17 [00:02<00:00,  7.36it/s]
100%|██████████| 38/38 [00:04<00:00,  9.39it/s]
100%|██████████| 24/24 [00:02<00:00,  8.53it/s]


n_features: 5, 0x
		wmdp-bio: 0.6355066895484924
		high_school_us_history: 0.7401961088180542
		high_school_geography: 0.7575757503509521
		college_computer_science: 0.44999998807907104
		human_aging: 0.6322870254516602
		college_biology: 0.7083333134651184


100%|██████████| 213/213 [00:21<00:00,  9.89it/s]
100%|██████████| 34/34 [00:08<00:00,  4.03it/s]
100%|██████████| 33/33 [00:03<00:00,  9.75it/s]
100%|██████████| 17/17 [00:02<00:00,  7.68it/s]
100%|██████████| 38/38 [00:03<00:00, 11.65it/s]
100%|██████████| 24/24 [00:02<00:00,  9.72it/s]


n_features: 5, 5x
		wmdp-bio: 0.5428122878074646
		high_school_us_history: 0.7401961088180542
		high_school_geography: 0.7575757503509521
		college_computer_science: 0.429999977350235
		human_aging: 0.6188341379165649
		college_biology: 0.6388888955116272


100%|██████████| 213/213 [00:21<00:00,  9.91it/s]
100%|██████████| 34/34 [00:08<00:00,  4.04it/s]
100%|██████████| 33/33 [00:03<00:00,  9.26it/s]
100%|██████████| 17/17 [00:02<00:00,  7.31it/s]
100%|██████████| 38/38 [00:03<00:00, 11.09it/s]
100%|██████████| 24/24 [00:02<00:00,  9.58it/s]


n_features: 5, 10x
		wmdp-bio: 0.4689709544181824
		high_school_us_history: 0.7205882668495178
		high_school_geography: 0.7575757503509521
		college_computer_science: 0.4099999964237213
		human_aging: 0.6188341379165649
		college_biology: 0.6180555820465088


100%|██████████| 213/213 [00:22<00:00,  9.66it/s]
100%|██████████| 34/34 [00:08<00:00,  4.04it/s]
100%|██████████| 33/33 [00:03<00:00, 10.22it/s]
100%|██████████| 17/17 [00:02<00:00,  7.32it/s]
100%|██████████| 38/38 [00:03<00:00, 11.42it/s]
100%|██████████| 24/24 [00:02<00:00,  9.70it/s]


n_features: 5, 15x
		wmdp-bio: 0.41162610054016113
		high_school_us_history: 0.6911764740943909
		high_school_geography: 0.7121211886405945
		college_computer_science: 0.4099999964237213
		human_aging: 0.6233184337615967
		college_biology: 0.5277777910232544


100%|██████████| 213/213 [00:24<00:00,  8.84it/s]
100%|██████████| 34/34 [00:08<00:00,  4.05it/s]
100%|██████████| 33/33 [00:02<00:00, 11.74it/s]
100%|██████████| 17/17 [00:02<00:00,  7.63it/s]
100%|██████████| 38/38 [00:03<00:00, 11.83it/s]
100%|██████████| 24/24 [00:02<00:00,  9.01it/s]


n_features: 5, 20x
		wmdp-bio: 0.3919874429702759
		high_school_us_history: 0.6666666865348816
		high_school_geography: 0.6717171669006348
		college_computer_science: 0.429999977350235
		human_aging: 0.5919283032417297
		college_biology: 0.4930555522441864


100%|██████████| 213/213 [00:21<00:00,  9.82it/s]
100%|██████████| 34/34 [00:08<00:00,  4.06it/s]
100%|██████████| 33/33 [00:03<00:00, 10.99it/s]
100%|██████████| 17/17 [00:02<00:00,  7.75it/s]
100%|██████████| 38/38 [00:03<00:00, 12.11it/s]
100%|██████████| 24/24 [00:02<00:00,  8.95it/s]


n_features: 10, 0x
		wmdp-bio: 0.6355066895484924
		high_school_us_history: 0.7401961088180542
		high_school_geography: 0.7575757503509521
		college_computer_science: 0.44999998807907104
		human_aging: 0.6322870254516602
		college_biology: 0.7083333134651184


100%|██████████| 213/213 [00:21<00:00,  9.72it/s]
100%|██████████| 34/34 [00:08<00:00,  4.04it/s]
100%|██████████| 33/33 [00:02<00:00, 11.34it/s]
100%|██████████| 17/17 [00:02<00:00,  7.74it/s]
100%|██████████| 38/38 [00:03<00:00, 11.44it/s]
100%|██████████| 24/24 [00:02<00:00,  9.68it/s]


n_features: 10, 5x
		wmdp-bio: 0.533385694026947
		high_school_us_history: 0.7401961088180542
		high_school_geography: 0.752525269985199
		college_computer_science: 0.4399999976158142
		human_aging: 0.6188341379165649
		college_biology: 0.6458333134651184


100%|██████████| 213/213 [00:21<00:00,  9.80it/s]
100%|██████████| 34/34 [00:08<00:00,  4.06it/s]
100%|██████████| 33/33 [00:02<00:00, 11.57it/s]
100%|██████████| 17/17 [00:02<00:00,  7.76it/s]
100%|██████████| 38/38 [00:03<00:00, 11.68it/s]
100%|██████████| 24/24 [00:02<00:00,  9.04it/s]


n_features: 10, 10x
		wmdp-bio: 0.4422623813152313
		high_school_us_history: 0.7254902124404907
		high_school_geography: 0.7575757503509521
		college_computer_science: 0.429999977350235
		human_aging: 0.6278027296066284
		college_biology: 0.625


100%|██████████| 213/213 [00:22<00:00,  9.40it/s]
100%|██████████| 34/34 [00:08<00:00,  4.04it/s]
100%|██████████| 33/33 [00:02<00:00, 11.43it/s]
100%|██████████| 17/17 [00:02<00:00,  7.72it/s]
100%|██████████| 38/38 [00:03<00:00, 11.33it/s]
100%|██████████| 24/24 [00:02<00:00,  9.59it/s]


n_features: 10, 15x
		wmdp-bio: 0.4037706255912781
		high_school_us_history: 0.7009804248809814
		high_school_geography: 0.7020202279090881
		college_computer_science: 0.429999977350235
		human_aging: 0.6322870254516602
		college_biology: 0.5277777910232544


100%|██████████| 213/213 [00:22<00:00,  9.60it/s]
100%|██████████| 34/34 [00:08<00:00,  4.08it/s]
100%|██████████| 33/33 [00:03<00:00,  9.77it/s]
100%|██████████| 17/17 [00:02<00:00,  7.71it/s]
100%|██████████| 38/38 [00:03<00:00, 11.79it/s]
100%|██████████| 24/24 [00:02<00:00,  9.93it/s]


n_features: 10, 20x
		wmdp-bio: 0.3723487854003906
		high_school_us_history: 0.6519607901573181
		high_school_geography: 0.691919207572937
		college_computer_science: 0.4599999785423279
		human_aging: 0.5964125990867615
		college_biology: 0.472222238779068


100%|██████████| 213/213 [00:22<00:00,  9.46it/s]
100%|██████████| 34/34 [00:08<00:00,  4.04it/s]
100%|██████████| 33/33 [00:02<00:00, 11.40it/s]
100%|██████████| 17/17 [00:02<00:00,  7.79it/s]
100%|██████████| 38/38 [00:03<00:00, 11.91it/s]
100%|██████████| 24/24 [00:02<00:00,  9.80it/s]


n_features: 15, 0x
		wmdp-bio: 0.6355066895484924
		high_school_us_history: 0.7401961088180542
		high_school_geography: 0.7575757503509521
		college_computer_science: 0.44999998807907104
		human_aging: 0.6322870254516602
		college_biology: 0.7083333134651184


100%|██████████| 213/213 [00:21<00:00,  9.98it/s]
100%|██████████| 34/34 [00:08<00:00,  4.05it/s]
100%|██████████| 33/33 [00:03<00:00, 10.47it/s]
100%|██████████| 17/17 [00:02<00:00,  7.78it/s]
100%|██████████| 38/38 [00:03<00:00, 11.33it/s]
100%|██████████| 24/24 [00:02<00:00,  9.41it/s]


n_features: 15, 5x
		wmdp-bio: 0.5192458629608154
		high_school_us_history: 0.75
		high_school_geography: 0.752525269985199
		college_computer_science: 0.4599999785423279
		human_aging: 0.6188341379165649
		college_biology: 0.6111111044883728


100%|██████████| 213/213 [00:21<00:00,  9.88it/s]
100%|██████████| 34/34 [00:08<00:00,  4.03it/s]
100%|██████████| 33/33 [00:02<00:00, 11.75it/s]
100%|██████████| 17/17 [00:02<00:00,  7.73it/s]
100%|██████████| 38/38 [00:03<00:00, 11.51it/s]
100%|██████████| 24/24 [00:02<00:00,  9.55it/s]


n_features: 15, 10x
		wmdp-bio: 0.4155538082122803
		high_school_us_history: 0.7107843160629272
		high_school_geography: 0.7373737096786499
		college_computer_science: 0.41999998688697815
		human_aging: 0.5695067644119263
		college_biology: 0.5972222089767456


100%|██████████| 213/213 [00:21<00:00,  9.84it/s]
100%|██████████| 34/34 [00:08<00:00,  4.05it/s]
100%|██████████| 33/33 [00:02<00:00, 12.07it/s]
100%|██████████| 17/17 [00:02<00:00,  7.80it/s]
100%|██████████| 38/38 [00:03<00:00, 12.21it/s]
100%|██████████| 24/24 [00:02<00:00,  9.85it/s]


n_features: 15, 15x
		wmdp-bio: 0.38098978996276855
		high_school_us_history: 0.7058823704719543
		high_school_geography: 0.7121211886405945
		college_computer_science: 0.41999998688697815
		human_aging: 0.5201793909072876
		college_biology: 0.4583333432674408


100%|██████████| 213/213 [00:20<00:00, 10.19it/s]
100%|██████████| 34/34 [00:08<00:00,  4.03it/s]
100%|██████████| 33/33 [00:02<00:00, 11.79it/s]
100%|██████████| 17/17 [00:02<00:00,  7.70it/s]
100%|██████████| 38/38 [00:03<00:00, 11.64it/s]
100%|██████████| 24/24 [00:02<00:00,  9.87it/s]


n_features: 15, 20x
		wmdp-bio: 0.3676355183124542
		high_school_us_history: 0.6470588445663452
		high_school_geography: 0.6767676472663879
		college_computer_science: 0.44999998807907104
		human_aging: 0.48878926038742065
		college_biology: 0.4583333432674408


100%|██████████| 213/213 [00:20<00:00, 10.22it/s]
100%|██████████| 34/34 [00:08<00:00,  4.05it/s]
100%|██████████| 33/33 [00:02<00:00, 11.76it/s]
100%|██████████| 17/17 [00:02<00:00,  7.75it/s]
100%|██████████| 38/38 [00:03<00:00, 11.93it/s]
100%|██████████| 24/24 [00:02<00:00,  9.87it/s]


n_features: 20, 0x
		wmdp-bio: 0.6355066895484924
		high_school_us_history: 0.7401961088180542
		high_school_geography: 0.7575757503509521
		college_computer_science: 0.44999998807907104
		human_aging: 0.6322870254516602
		college_biology: 0.7083333134651184


100%|██████████| 213/213 [00:21<00:00,  9.87it/s]
100%|██████████| 34/34 [00:08<00:00,  4.06it/s]
100%|██████████| 33/33 [00:02<00:00, 11.48it/s]
100%|██████████| 17/17 [00:02<00:00,  7.79it/s]
100%|██████████| 38/38 [00:03<00:00, 12.48it/s]
100%|██████████| 24/24 [00:02<00:00, 10.06it/s]


n_features: 20, 5x
		wmdp-bio: 0.49960723519325256
		high_school_us_history: 0.7450980544090271
		high_school_geography: 0.747474730014801
		college_computer_science: 0.4399999976158142
		human_aging: 0.6233184337615967
		college_biology: 0.625


100%|██████████| 213/213 [00:20<00:00, 10.21it/s]
100%|██████████| 34/34 [00:08<00:00,  4.05it/s]
100%|██████████| 33/33 [00:02<00:00, 11.78it/s]
100%|██████████| 17/17 [00:02<00:00,  7.69it/s]
100%|██████████| 38/38 [00:03<00:00, 12.41it/s]
100%|██████████| 24/24 [00:02<00:00,  9.74it/s]


n_features: 20, 10x
		wmdp-bio: 0.41633936762809753
		high_school_us_history: 0.7107843160629272
		high_school_geography: 0.7373737096786499
		college_computer_science: 0.429999977350235
		human_aging: 0.5470852255821228
		college_biology: 0.5416666865348816


100%|██████████| 213/213 [00:22<00:00,  9.57it/s]
100%|██████████| 34/34 [00:08<00:00,  4.06it/s]
100%|██████████| 33/33 [00:02<00:00, 11.46it/s]
100%|██████████| 17/17 [00:02<00:00,  7.90it/s]
100%|██████████| 38/38 [00:03<00:00, 10.99it/s]
100%|██████████| 24/24 [00:02<00:00,  9.82it/s]


n_features: 20, 15x
		wmdp-bio: 0.37627652287483215
		high_school_us_history: 0.7009804248809814
		high_school_geography: 0.7171717286109924
		college_computer_science: 0.41999998688697815
		human_aging: 0.5112107992172241
		college_biology: 0.375


100%|██████████| 213/213 [00:21<00:00,  9.95it/s]
100%|██████████| 34/34 [00:08<00:00,  4.07it/s]
100%|██████████| 33/33 [00:02<00:00, 11.92it/s]
100%|██████████| 17/17 [00:02<00:00,  7.70it/s]
100%|██████████| 38/38 [00:03<00:00, 11.68it/s]
100%|██████████| 24/24 [00:02<00:00,  9.75it/s]

n_features: 20, 20x
		wmdp-bio: 0.35349568724632263
		high_school_us_history: 0.6372549533843994
		high_school_geography: 0.6868686676025391
		college_computer_science: 0.44999998807907104
		human_aging: 0.5022422075271606
		college_biology: 0.4166666567325592



