In [16]:
from ioi_utils import *
from circuit_utils import *
from sae_variants import *
from sae_interp import *
from sae_interventions import *
from training import *
from mandala._next.imports import *
from mandala._next.common_imports import *

# Circuit setup

In [32]:
from circuit_utils import *
torch.set_printoptions(sci_mode=False)
if 'model' in locals():
    MODELS[MODEL_ID] = model

HEAD_CLASS_FIG = {
    'nm': 'Name Mover',
    'bnm': 'Backup Name Mover',
    'ind': 'Induction',
    'nnm': 'Negative Name Mover',
    'si': 'S-Inhibition',
    'dt': 'Duplicate Token',
    'pt': 'Previous Token',
}

COMPONENT_NAME_FIG = {
    'k': 'Key',
    'v': 'Value',
    'q': 'Query',
    'z': 'Attn Output',
}

CROSS_SECTION_FIG = {
    'ind+dt@z': 'Ind+DT out',
    'nm+bnm@q': '(B)NM q',
    'nm+bnm@qk': '(B)NM qk',
    'nm+bnm@z': '(B)NM out',
    'si@v': 'S-I v',
    'si@z': 'S-I out',
}

c = Circuit()
paper_cross_sections = [
    # IO
    (c.zs(c.nm + c.bnm), ('io',), 'nm+bnm@z'),
    # (c.qs(c.nm + c.bnm) + c.ks(c.nm + c.bnm), ('io',), 'nm+bnm@qk'),
    # (c.qs(c.nm + c.bnm), ('io',), 'nm+bnm@q'),
    # # S
    # (c.qs(c.nm + c.bnm) + c.ks(c.nm + c.bnm), ('s',), 'nm+bnm@qk'),
    # (c.qs(c.nm + c.bnm), ('s',), 'nm+bnm@q'),
    # (c.vs(c.si), ('s',), 'si@v'),
    # (c.zs(c.si), ('s',), 'si@z'),
    # (c.zs(c.ind) + c.zs(c.dt), ('s',), 'ind+dt@z'),
    # Pos
    (c.qs(c.nm + c.bnm) + c.ks(c.nm + c.bnm), ('io_pos',), 'nm+bnm@qk'),
    (c.qs(c.nm + c.bnm), ('io_pos',), 'nm+bnm@q'),
    (c.zs(c.si), ('io_pos',), 'si@z'),
    (c.vs(c.si), ('io_pos',), 'si@v'),
    (c.zs(c.ind) + c.zs(c.dt), ('io_pos',), 'ind+dt@z'),
    # # Pos + S
    # (c.qs(c.nm + c.bnm) + c.ks(c.nm + c.bnm), ('io_pos', 's'), 'nm+bnm@qk'),
    # (c.zs(c.si), ('io_pos', 's'), 'si@z'),
    # (c.vs(c.si), ('io_pos', 's'), 'si@v'),
    # (c.zs(c.ind) + c.zs(c.dt), ('io_pos', 's'), 'ind+dt@z'),
    # # All
    # (c.qs(c.nm + c.bnm) + c.ks(c.nm + c.bnm), ('io', 'io_pos', 's'), 'nm+bnm@qk'),
]

locations_displaynames = {
    'nm+bnm@z': '(B)NM out',
    'nm+bnm@qk': '(B)NM qk',
    'nm+bnm@q': '(B)NM q',
    'si@v': 'S-I v',
    'si@z': 'S-I out',
    'ind+dt@z': 'Ind+DT out',
}

NODES = c.zs(c.nm + c.bnm) + c.qs(c.nm + c.bnm) + c.zs(c.si) + [n for n in c.vs(c.si) if n.seq_pos == 's2'] + c.zs(c.ind) + c.zs(c.dt) + c.ks(c.nm + c.bnm)

In [3]:
DB_PATH = '/media/amakelov/SanDisk1TB/paper_sprint/test.db'

In [4]:
storage = Storage(db_path=DB_PATH)

In [5]:
model = get_model()
MODELS[MODEL_ID] = model



Loaded pretrained model gpt2-small into HookedTransformer


# Preparing datasets

In [6]:
# nodes = list(c.nodes.keys())
# circuit_nodes = list(c.nodes.keys())

with storage:

    ############################################################################ 
    ### prompt dataset for training supervised features
    ############################################################################ 
    P_train = generate_prompts(
        distribution=full_distribution,
        patterns=['ABB', 'BAB'],
        prompts_per_pattern=10_000,
        random_seed=0,
    )
    N_TRAIN = len(storage.unwrap(P_train))
    ### activations for training supervised features
    As_train = run_with_cache(
        prompts=P_train, 
        nodes=NODES,
        batch_size=100,
        model_id=MODEL_ID,
        verbose=True,
    )
    A_TRAIN_DICT = {node: A for node, A in zip(NODES, As_train)}

    # ### precompute the mean logit difference for clean training data
    # logits_train_clean = run_with_hooks(prompts=P_train, hooks=[], batch_size=200,)
    # CLEAN_LD_MEAN = (storage.unwrap(logits_train_clean)[:, 0] - storage.unwrap(logits_train_clean)[:, 1]).mean().item()

    # ### precompute the mean-ablated logit difference when ablating each node
    # A_TRAIN_MEAN_DICT = {node: get_dataset_mean(A) for node, A in A_TRAIN_DICT.items()}

    # MEAN_ABLATED_LD_DICT = {}
    # for node, A in A_TRAIN_DICT.items():
    #     MEAN_ABLATED_LD_DICT[node] = compute_mean_ablated_lds(
    #         node=node, prompts=P_train, A_mean=A_TRAIN_MEAN_DICT[node], batch_size=200,
    #     )

    ############################################################################ 
    ### prompt dataset for editing and other evaluations
    ############################################################################ 
    N_NAMES = len(NAMES)
    editing_base_distribution = copy.deepcopy(full_distribution)
    editing_base_distribution.names = editing_base_distribution.names[:N_NAMES // 2]
    editing_source_distribution = copy.deepcopy(full_distribution)
    editing_source_distribution.names = editing_source_distribution.names[N_NAMES // 2:]

    P_eval = generate_prompts(
        distribution=editing_base_distribution,
        patterns=['ABB', 'BAB'],
        prompts_per_pattern=2500,
        random_seed=1,
    )
    As_eval = run_with_cache(
        prompts=P_eval, 
        nodes=NODES,
        batch_size=100,
        model_id=MODEL_ID,
        verbose=True,
    )
    P_eval_feature_idxs = get_prompt_feature_idxs(
        prompts=P_eval,
        features=[('io',), ('s',), ('io_pos',),],
    )
    A_EVAL_DICT = {node: A for node, A in zip(NODES, As_eval)}

    N_EVAL = len(storage.unwrap(P_eval))
    N_NAMES_EVAL_SOURCE = len(editing_source_distribution.names)

    ### precompute the mean logit difference for clean training data
    logits_eval_clean = run_with_hooks(prompts=P_eval, hooks=[], batch_size=200,)
    CLEAN_LD_EVAL_MEAN = (storage.unwrap(logits_eval_clean)[:, 0] - storage.unwrap(logits_eval_clean)[:, 1]).mean().item()

    ### precompute the mean-ablated logit difference when ablating each node
    A_EVAL_MEAN_DICT = {node: get_dataset_mean(A) for node, A in A_EVAL_DICT.items()}

    MEAN_ABLATED_LD_EVAL_DICT = {}
    for node, A in A_EVAL_DICT.items():
        MEAN_ABLATED_LD_EVAL_DICT[node] = compute_mean_ablated_lds(
            node=node, prompts=P_eval, A_mean=A_EVAL_MEAN_DICT[node], batch_size=200,
        )

    ############################################################################ 
    ### Compute counterfactual prompts and activations
    ############################################################################ 
    ATTRIBUTES = [('io_pos',), ('s',), ('io',), ] # ('s', 'io_pos',), ('io', 'io_pos'), ('s', 'io',), ('io_pos', 's', 'io',), ]

    CF_PROMPTS_DICT = {}
    for attribute in ATTRIBUTES:
        CF_PROMPTS_DICT[attribute] = get_cf_prompts(
            prompts=P_eval, 
            features=attribute,
            io_targets=generate_name_samples(N_EVAL, editing_source_distribution.names[:N_NAMES_EVAL_SOURCE // 2]),
            s_targets=generate_name_samples(N_EVAL, editing_source_distribution.names[N_NAMES_EVAL_SOURCE//2:]),     
        )
    ### Compute counterfactual activations
    A_EVAL_CF_DICT = {}
    for attribute, cf_prompts in tqdm(CF_PROMPTS_DICT.items()):
        A_EVAL_CF_DICT[attribute] = run_with_cache(
            prompts=cf_prompts, 
            nodes=NODES,
            batch_size=100,
            model_id=MODEL_ID,
            verbose=True,
        )
    for attribute in A_EVAL_CF_DICT:
        A_EVAL_CF_DICT[attribute] = {node: A_EVAL_CF_DICT[attribute][i] for i, node in enumerate(NODES)}
    
    P_eval_cf_feature_idxs = {}
    for attribute, cf_prompts in CF_PROMPTS_DICT.items():
        P_eval_cf_feature_idxs[attribute] = get_prompt_feature_idxs(
            prompts=cf_prompts,
            features=[attribute],
        )

100%|██████████| 3/3 [00:00<00:00, 14.53it/s]


### Gradient collection

In [7]:
P_TRAIN_GRADIENTS = get_gradients(storage=storage, nodes=NODES, prompts=P_train, computing=False, n_batches=100)
P_EVAL_GRADIENTS = get_gradients(storage=storage, nodes=NODES, prompts=P_eval, computing=False, n_batches=25)
P_CF_GRADIENTS = {}
for attribute in ATTRIBUTES:
    P_CF_GRADIENTS[attribute] = get_gradients(storage=storage, nodes=NODES, prompts=CF_PROMPTS_DICT[attribute], computing=False, n_batches=25)

  5%|▌         | 5/100 [00:00<00:11,  8.14it/s]


KeyboardInterrupt: 

# Choosing SAEs to use for editing

In [8]:
SAES_DICT = joblib.load('SAES_DICT.joblib') # (variant, node, l1, end_epoch) -> encoder
EDITED_INTERP_DICT = joblib.load('EDITED_INTERP_DICT.joblib') # (variant, node, l1, end_epoch, attribute, num_exchange) -> A_edited
EDITED_AGNOSTIC_DICT = joblib.load('EDITED_AGNOSTIC_DICT.joblib') # (variant, node, l1, end_epoch, attribute, num_exchange) -> A_edited

In [9]:
# merge into 1 edit dict
EDITED_DICT = {} # keys will be (variant, node, l1, end_epoch, attribute, num_exchange, edit_type)
for k, v in EDITED_INTERP_DICT.items():
    EDITED_DICT[k + ('interp',)] = v
for k, v in EDITED_AGNOSTIC_DICT.items():
    EDITED_DICT[k + ('agnostic',)] = v

# Computing the distances dataframe

In [10]:
targets = storage.unwrap(A_EVAL_CF_DICT)
target_grads = storage.unwrap(P_CF_GRADIENTS)

NameError: name 'P_CF_GRADIENTS' is not defined

In [None]:
storage.load_ref??

In [None]:
rows = []
for i, ((variant, node, l1_coeff, end_epoch, attribute, num_exchange), A_edited) in tqdm(enumerate(EDITED_INTERP_DICT.items())):
    x = storage.load_ref(hid=A_edited.hid, lazy=False).obj
    dist_l2 = get_activation_distance.f(
        A_target=targets[attribute][node],
        A_edited=x,
        A_target_grad=None,
        method='l2',
    )
    rows.append({
        'variant': variant,
        'edit_type': 'interp',
        'node': node,
        'l1_coeff': l1_coeff,
        'end_epoch': end_epoch,
        'attribute': attribute,
        'num_exchange': num_exchange,
        'dist_method': 'l2',
        'dist': dist_l2.item(),
    })
    dist_attribution = get_activation_distance.f(
        A_target=targets[attribute][node],
        A_edited=x,
        A_target_grad=target_grads[attribute][node],
        method='attribution',
    )
    rows.append({
        'variant': variant,
        'edit_type': 'interp',
        'node': node,
        'l1_coeff': l1_coeff,
        'end_epoch': end_epoch,
        'attribute': attribute,
        'num_exchange': num_exchange,
        'dist_method': 'attribution',
        'dist': dist_attribution.item(),
    })
    if i % 1_000 == 0:
        storage.atoms.clear()

EDIT_SELECTION_DF = pd.DataFrame(rows)

In [None]:
edit_selection_df_interp = EDIT_SELECTION_DF.copy()

In [None]:
rows = []
for i, ((variant, node, l1_coeff, end_epoch, attribute, num_exchange), A_edited) in tqdm(enumerate(EDITED_AGNOSTIC_DICT.items())):
    x = storage.load_ref(hid=A_edited.hid, lazy=False).obj
    dist_l2 = get_activation_distance.f(
        A_target=targets[attribute][node],
        A_edited=x,
        A_target_grad=None,
        method='l2',
    )
    rows.append({
        'variant': variant,
        'edit_type': 'agnostic',
        'node': node,
        'l1_coeff': l1_coeff,
        'end_epoch': end_epoch,
        'attribute': attribute,
        'num_exchange': num_exchange,
        'dist_method': 'l2',
        'dist': dist_l2.item(),
    })
    dist_attribution = get_activation_distance.f(
        A_target=targets[attribute][node],
        A_edited=x,
        A_target_grad=target_grads[attribute][node],
        method='attribution',
    )
    rows.append({
        'variant': variant,
        'edit_type': 'agnostic',
        'node': node,
        'l1_coeff': l1_coeff,
        'end_epoch': end_epoch,
        'attribute': attribute,
        'num_exchange': num_exchange,
        'dist_method': 'attribution',
        'dist': dist_attribution.item(),
    })
    if i % 1_000 == 0:
        storage.atoms.clear()

edit_selection_df_agnostic = pd.DataFrame(rows)

# Choosing best SAEs

In [11]:
# edit_selection_df = pd.concat([edit_selection_df_interp, edit_selection_df_agnostic], ignore_index=True)
# joblib.dump(edit_selection_df, 'edit_selection_df.joblib')
EDIT_SELECTION_DF = joblib.load('edit_selection_df.joblib')
EDIT_SELECTION_DF['node'] = EDIT_SELECTION_DF['node'].map(lambda x: x.displayname)

In [12]:
### collect the encoder with the best distance for each variant, node, attribute, num_exchange, dist_method by minimizing over l1_coeff and end_epoch
x = EDIT_SELECTION_DF.groupby(['variant', 'edit_type', 'node', 'attribute', 'num_exchange', 'dist_method'])['dist'].idxmin()
# x = EDIT_SELECTION_DF.groupby(['variant', 'edit_type', 'node', 'attribute', 'num_exchange', 'dist_method'])['dist'].min().reset_index()
# now, choose the values of l1_coeff and end_epoch that minimize the distance
EDIT_SELECTION_DF_BEST = EDIT_SELECTION_DF.loc[x]
EDIT_SELECTION_DF_BEST

Unnamed: 0,variant,edit_type,node,l1_coeff,end_epoch,attribute,num_exchange,dist_method,dist
211333,attribution,agnostic,k@L10H0@io,0.5,2000,"(io,)",4,attribution,0.039842
211332,attribution,agnostic,k@L10H0@io,0.5,2000,"(io,)",4,l2,4.235226
211347,attribution,agnostic,k@L10H0@io,1.0,1,"(io,)",8,attribution,0.035129
211178,attribution,agnostic,k@L10H0@io,0.5,1,"(io,)",8,l2,3.843593
211181,attribution,agnostic,k@L10H0@io,0.5,1,"(io,)",16,attribution,0.027023
...,...,...,...,...,...,...,...,...,...
654,vanilla,interp,z@L9H9@end,5.0,1500,"(io_pos,)",4,l2,8.810353
489,vanilla,interp,z@L9H9@end,2.5,1500,"(io_pos,)",8,attribution,0.008462
488,vanilla,interp,z@L9H9@end,2.5,1500,"(io_pos,)",8,l2,7.159793
323,vanilla,interp,z@L9H9@end,1.0,1500,"(io_pos,)",16,attribution,0.007212


In [13]:
SELECTED_SAES = {} # (variant, edit_type, node, attribute, num_exchange, dist_method) -> encoder
node_to_displayname = {node: node.displayname for node in NODES}
displayname_to_node = {v: k for k, v in node_to_displayname.items()}
for (variant, edit_type, node_name, l1_coeff, end_epoch, attribute, num_exchange, dist_method, _) in tqdm(EDIT_SELECTION_DF_BEST.itertuples(index=False)):
    node = displayname_to_node[node_name]
    SELECTED_SAES[(variant, edit_type, node, attribute, num_exchange, dist_method)] = (SAES_DICT[(variant, node, l1_coeff, end_epoch)], l1_coeff, end_epoch)

3960it [00:00, 201909.07it/s]


In [None]:
EDIT_SELECTION_DF_BEST.edit_type.unique()

# Running the edits

In [None]:
list(EDITED_INTERP_DICT.keys())[0]

In [34]:
### the actual interventions
locations_to_edit = paper_cross_sections

def get_lds(logits: Tensor) -> np.ndarray:
    return (storage.unwrap(logits)[:, 0] - storage.unwrap(logits)[:, 1]).cpu().numpy()

def stringify_nodes(nodes: List[Node]) -> str:
    return ', '.join([node.displayname for node in nodes])

dfs = []
NUM_EXCHANGE_VALUES = [4, 8, 16]

with storage:
    for nodes_to_edit, feature_subset_to_edit, heads_class in tqdm(locations_to_edit):
        cf_activations = {node: A_EVAL_CF_DICT[feature_subset_to_edit][node] for node in nodes_to_edit}
        cf_prompts = CF_PROMPTS_DICT[feature_subset_to_edit]
        nodes_order = sorted(nodes_to_edit, key=lambda node: node.displayname)
        
        correct_predictions_base = model.to_tokens([f' {p.io_name}' for p in storage.unwrap(P_eval)])[:, 1]
        correct_predictions_cf = model.to_tokens([f' {p.io_name}' for p in storage.unwrap(cf_prompts)])[:, 1]

        for num_exchange in NUM_EXCHANGE_VALUES:
            for edit_type in ('agnostic', 'interp'):
                for variant in ('vanilla', 'gated', 'attribution'):
                    for dist_method in ('l2', 'attribution'):
                        best_encoder_data = {node: SELECTED_SAES[(variant, edit_type, node, feature_subset_to_edit, num_exchange, dist_method)] for node in nodes_to_edit}
                        best_edited_activations = {node: EDITED_DICT[(variant, node, l1_coeff, end_epoch, feature_subset_to_edit, num_exchange, edit_type)]
                                            for node, (encoder, l1_coeff, end_epoch) in best_encoder_data.items()}
                        ### intervention outcome
                        base_logits_edit, thing = run_activation_patch(
                            base_prompts=P_eval,
                            cf_prompts=cf_prompts,
                            batch_size=50,
                            nodes=nodes_order,
                            activations=[best_edited_activations[node] for node in nodes_order],
                            return_predictions=True,
                        )
                        cf_logits_edit, predictions_edit = storage.unwrap(thing)
                        accuracy_edit_base = (predictions_edit == correct_predictions_base).float().mean().item()
                        accuracy_edit_cf = (predictions_edit == correct_predictions_cf).float().mean().item()
                        base_logits_edit, cf_logits_edit = storage.unwrap(base_logits_edit), storage.unwrap(cf_logits_edit)

                        ### baseline (counterfactual patching) outcome
                        base_logits_patch, thing = run_activation_patch(
                            base_prompts=P_eval,
                            cf_prompts=cf_prompts,
                            batch_size=50,
                            nodes=nodes_order,
                            activations=[cf_activations[node] for node in nodes_order],
                            return_predictions=True,
                        )
                        cf_logits_patch, predictions_patch = storage.unwrap(thing)
                        accuracy_patch_base = (predictions_patch == correct_predictions_base).float().mean().item()
                        accuracy_patch_cf = (predictions_patch == correct_predictions_cf).float().mean().item()

                        base_logits_patch, cf_logits_patch = storage.unwrap(base_logits_patch), storage.unwrap(cf_logits_patch)
                        base_lds_edit = get_lds(base_logits_edit)
                        cf_lds_edit = get_lds(cf_logits_edit)
                        base_lds_patch = get_lds(base_logits_patch)
                        cf_lds_patch = get_lds(cf_logits_patch)

                        # if edit_type == 'agnostic':
                        #     removed_weights = [AGNOSTIC_REMOVED_WEIGHT_DICT[(node, num_exchange, diff_to_use, feature_subset_to_edit, freeze_decoder)] for node in nodes_to_edit]
                        # elif edit_type == 'interp':
                        #     removed_weights = [INTERP_REMOVED_WEIGHT_DICT[(node, num_exchange, diff_to_use, feature_subset_to_edit, freeze_decoder)] for node in nodes_to_edit]

                        df = pd.DataFrame({
                            # params
                            'num_exchange': num_exchange,
                            'nodes_to_edit': stringify_nodes(nodes_to_edit),
                            'heads_class': heads_class,
                            'features': [feature_subset_to_edit for _ in range(len(base_logits_edit))],
                            'edit_type': edit_type,
                            'prompt_idx': list(range(len(base_logits_edit))),
                            'variant': variant,
                            'dist_method': dist_method,
                            # agreement between cf predictions and edit predictions
                            'agreement_with_cf': (predictions_edit == predictions_patch).cpu().numpy(),
                            
                            # weight removed
                            # 'weight_removed_fraction': np.mean(removed_weights),
                        })
                        dfs.append(df)
                        storage.commit()
                        storage.atoms.clear()
                        torch.cuda.empty_cache()
                        print('finished one')
df = pd.concat(dfs)

  0%|          | 0/6 [00:00<?, ?it/s]

finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one


 17%|█▋        | 1/6 [00:02<00:13,  2.60s/it]

finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one


 33%|███▎      | 2/6 [00:04<00:08,  2.08s/it]

finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one


 50%|█████     | 3/6 [00:06<00:05,  1.95s/it]

finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one


 67%|██████▋   | 4/6 [00:07<00:03,  1.79s/it]

finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one


 83%|████████▎ | 5/6 [00:09<00:01,  1.68s/it]

finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one
finished one


100%|██████████| 6/6 [00:10<00:00,  1.83s/it]

finished one





In [37]:
x = df.groupby(['nodes_to_edit', 'edit_type', 'variant', 'dist_method', 'num_exchange', 'features', ])[['agreement_with_cf']].mean().reset_index()

In [40]:
alt.Chart(x.query('variant != "attribution"')).mark_line().encode(
    x='num_exchange',
    y='agreement_with_cf',
    color='variant',
    strokeDash='edit_type',
    row='features',
    column='dist_method',
    detail='nodes_to_edit',
).interactive()

In [22]:
storage.commit()

In [24]:
df['agreement_with_cf'].mean()

0.5394