In [1]:
import sys

sys.path.append("../../")

%load_ext autoreload
%autoreload 2

In [2]:
import lightgbm as lgb
import numpy as np
import pandas as pd
import hashlib
from ast import literal_eval
from pathlib import Path
from functools import reduce

from src.utils import find_meta_category
from src.feature_extractor import sample_feature_combinations

## Download prerequisite files

Fetch all the results and feature values


In [3]:
# You can get the experiments file here: 01J6KF3JRCATRJQ9CPJTRV5VBM (https://beaker.org/ds/01J6KF3JRCATRJQ9CPJTRV5VBM/details)
!echo "Fetching experiments list..."
!beaker dataset fetch 01J6KF3JRCATRJQ9CPJTRV5VBM --prefix experiments.txt
!echo "Fetching extracted features..."
!mkdir features/
!beaker dataset fetch 01J6KF3JRCATRJQ9CPJTRV5VBM --prefix features/ 
#!beaker dataset fetch 01J6KFVCRCTYHCZDR0XNK0G9HT --prefix features/
!echo "Fetching helpsteer2 dataset"
!beaker dataset fetch 01J6KBM2VCM9EQ7MER26VBXCCM
!echo "Collating all evaluation results"
%run ../../scripts/fetch_evals_rewardbench.py --output_file results.csv --gpt4_threshold_score 0.658 --experiment_prefix rm-eval-helpsteer2 --experiments_file experiments.txt

Fetching experiments list...
Downloading dataset [36m01J6KF3JRCATRJQ9CPJTRV5VBM[0m to [32m.[0m
Files: 0          ⠋  
Bytes: 0 B        ⠋  
[2A[JFiles: 1          ⠙  
Bytes: 73.77 KiB  ⠙  
[2A[JFiles: 1          ✔  
Bytes: 73.77 KiB  ✔  
Completed in 100ms: 404.4 KiB/s, 5 files/s
Fetching extracted features...
mkdir: features/: File exists
Downloading dataset [36m01J6KF3JRCATRJQ9CPJTRV5VBM[0m to [32m.[0m
Files: 0          ⠋  
Bytes: 0 B        ⠋  
[2A[JFiles: 1          ⠙  
Bytes: 37.52 MiB  ⠙  
[2A[JFiles: 8          ⠹  
Bytes: 301.6 MiB  ⠹  
[2A[JFiles: 15         ⠸  
Bytes: 564.9 MiB  ⠸  
[2A[JFiles: 16         ⠼  
Bytes: 602.9 MiB  ⠼  
[2A[JFiles: 16         ✔  
Bytes: 602.9 MiB  ✔  
[2A[JFiles: 16         ✔  
Bytes: 602.9 MiB  ✔  
Completed in 400ms: 1.186 GiB/s, 32 files/s
Fetching helpsteer2 dataset
Downloading dataset [36m01J6KBM2VCM9EQ7MER26VBXCCM[0m to [32m.[0m
Files: 0          ⠋  
Bytes: 0 B        ⠋  
[2A[JFiles: 0          ⠙  
Bytes: 0 B      

Collate feature set for all instances


In [4]:
LEXICAL_FEATS_PATH = Path("features")
DATASET_PATH = Path("helpsteer2_human_vs_gpt4_weighted_for_llama.jsonl")


def get_dataset_features(
    feature_path=LEXICAL_FEATS_PATH, dataset_path=DATASET_PATH
) -> "pd.DataFrame":
    lexical_features = [
        "rouge",
        "bertscore",
        "bertscore_length",
        "entity_sim",
        "cosine_sim",
        "prompt_len",
        "len_longer",
        "len_shorter",
        "token_len_difference",
    ]
    lexical_feature_files = [
        file
        for file in feature_path.glob("*.jsonl")
        if any(file.stem in feat for feat in lexical_features)
    ]
    lexical_feats_df = reduce(
        lambda left, right: left.merge(
            right, on=["id", "prompt", "completion_a", "completion_b"], how="outer"
        ),
        [pd.read_json(file, lines=True) for file in lexical_feature_files],
    )

    df = pd.read_json(dataset_path, lines=True).rename(columns={"prompt_hash": "id"})
    finaldf = df.merge(lexical_feats_df, how="left", on="id").drop(
        columns=["prompt", "completion_a", "completion_b"]
    )

    # Hacky way for token_len_difference
    finaldf = finaldf.rename(columns={"token_len_diff": "token_len_difference"})
    return finaldf

In [5]:
results_df = pd.read_csv("results.csv").dropna()
features_df = get_dataset_features()
print(len(results_df))

60


## Get proportion of instances that fulfill the conditions

1. For each row, get features that were activated
2. Then for each activated feature, we get the proportion by looking at the feature dataframe.
3. The proportion is computed as: `number_of_instance_that_fulfill_a_single_condition` / `total_number_of_instances`


In [6]:
# Inspect nan columns
rows_with_nan = features_df[features_df.isna().any(axis=1)]
nan_columns = rows_with_nan.columns[rows_with_nan.isna().any()]
df_nan_columns = rows_with_nan[nan_columns]
df_nan_columns

Unnamed: 0,expertise_level,format_constraints
289,,[]
1317,expert domain knowledge,
4613,basic domain knowledge,
4734,general public,


So what you're going to do instead, is to take the binary_cols, and then for each element of that binary_cols, you compute the "weight"


In [7]:
def compute_instances(feat: str, features_df: "pd.DataFrame") -> float:
    total = len(features_df)
    lexical_features = [
        "rouge",
        "bertscore",
        "bertscore_length",
        "entity_sim",
        "cosine_sim",
        "prompt_len",
        "len_longer",
        "len_shorter",
        "token_len_difference",
    ]

    if feat.split("__")[0] in lexical_features:
        feat_name, value = feat.split("__")
        min_val_str, max_val_str = value.split("|")
        min_val, max_val = float(min_val_str.split("=")[1]), float(
            max_val_str.split("=")[1]
        )
        return features_df[feat_name].between(min_val, max_val).mean()
    else:
        # Parse the feature
        feat_name, value = feat.split("=")
        meta_category = find_meta_category(feat_name)
        if meta_category == "scalar":
            v = value.replace("_", " ")
            return features_df[feat_name].value_counts().get(v) / total
        elif meta_category == "closed_set":
            v = value.replace("_", " ")
            list_of_values = features_df[feat_name].tolist()
            return sum([1 if v in listval else 0 for listval in list_of_values]) / total
        elif meta_category == "open_set":
            list_of_values = features_df[feat_name].tolist()
            return sum([1 if listval else 0 for listval in list_of_values]) / total

        return find_meta_category(feat_name)


feats = results_df.columns[results_df.isin([0, 1]).all()]  # get binary columns
feat_map = {
    feat: compute_instances(feat, features_df) for feat in feats if feat != "label"
}

ratio_df = results_df.apply(
    lambda row: row.map(lambda x: feat_map.get(row.name, 1) if x == 1 else x)
)

# Regressor training


In [33]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

## Train LightGBM regressor


In [19]:
params = {
    "objective": "regression",
    "metric": "mse",
    "boosting_type": "gbdt",
    "learning_rate": 0.1,
    "num_leaves": 31,
}

# Train the model
binary = False
X = ratio_df[list(feat_map.keys())]
if binary:
    X = (X > 0).astype(int)
y = ratio_df["Overall"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, random_state=42
)
print(f"Train size: {len(X_train)}, test size: {len(X_test)}")


train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)
model = lgb.train(params, train_data, valid_sets=[test_data])

# Predict and evaluate
y_pred = model.predict(X_test, num_iteration=model.best_iteration)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 54, number of used features: 0
[LightGBM] [Info] Start training from score 0.695385
Mean Squared Error: 0.0014429161302278335


In [11]:
importances = model.feature_importance(importance_type="gain")  # ['split', 'gain']

# Create a DataFrame to view feature importances
feature_importance_df = pd.DataFrame(
    {"Feature": X.columns, "Importance": importances}
).sort_values(by="Importance", ascending=False)

print(feature_importance_df)

                                  Feature  Importance
0    bertscore__min_val=0.33|max_val=0.67         0.0
33       rouge__min_val=0.33|max_val=0.67         0.0
35                    safety_concern=high         0.0
36                     safety_concern=low         0.0
37                safety_concern=moderate         0.0
..                                    ...         ...
27                open_endedness=moderate         0.0
28                      open_endedness=no         0.0
29   prompt_len__min_val=0.0|max_val=0.33         0.0
30  prompt_len__min_val=0.33|max_val=0.67         0.0
64          type_of_in_context_material=1         0.0

[65 rows x 2 columns]


## Train LinearRegressor


In [70]:
polyfit = True
binary = False

X = ratio_df[list(feat_map.keys())]
y = ratio_df["Overall"]
if binary:
    X = (X > 0).astype(int)

if polyfit:
    poly = PolynomialFeatures(degree=2, include_bias=False)
    X_poly = poly.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(
        X_poly, y, test_size=0.2, random_state=42
    )
else:
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )


print(f"Train size: {len(X_train)}, test size: {len(X_test)}")

model = LinearRegression()
model.fit(X_train, y_train)


y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Feature names: {poly.get_feature_names_out(X.columns)}")
print(f"Mean Squared Error: {mse}")
print(f"Coeeficients: {model.coef_}")
print(f"Intercept: {model.intercept_}")

Train size: 48, test size: 12
Feature names: ['bertscore__min_val=0.33|max_val=0.67'
 'bertscore__min_val=0.67|max_val=1.0'
 'bertscore_length__min_val=0.0|max_val=0.33' ...
 'token_len_difference__min_val=0.67|max_val=1.0^2'
 'token_len_difference__min_val=0.67|max_val=1.0 type_of_in_context_material=1'
 'type_of_in_context_material=1^2']
Mean Squared Error: 0.0018708141108625639
Coeeficients: [-2.12363981e-13  3.55453806e-02  1.18579444e-01 ...  6.68761130e-05
  0.00000000e+00 -5.52174488e-04]
Intercept: 0.6969893593456048


In [80]:
if not polyfit:
    feature_importance = pd.DataFrame(
        {"Feature": X.columns, "Coefficient": model.coef_}
    )

    # Calculate absolute importance for easier comparison
    feature_importance["Absolute_Coefficient"] = np.abs(
        feature_importance["Coefficient"]
    )

    # Sort by absolute coefficient value
    feature_importance = feature_importance.sort_values(
        by="Absolute_Coefficient", ascending=False
    )
    feature_importance.head(10)
else:
    print(
        "Feature importance is not possible with polynomial features (hard to interpret)"
    )

Feature importance is not possible with polynomial features (hard to interpret)


## Simulation


In [72]:
from tqdm import tqdm_notebook

In [73]:
_, combinations = sample_feature_combinations(
    meta_analyzer_n_samples=2000, max_number=10
)

10it [00:00, 79437.58it/s]
45it [00:00, 87421.81it/s]
120it [00:00, 55504.68it/s]
210it [00:00, 57437.49it/s]
252it [00:00, 48913.17it/s]
210it [00:00, 43921.60it/s]
120it [00:00, 34754.63it/s]
45it [00:00, 28767.52it/s]
10it [00:00, 19427.07it/s]
1it [00:00, 11915.64it/s]

2024-09-02 12:39:15 - INFO - root - Adding meta analyzer features



10it [00:00, 44243.71it/s]
45it [00:00, 104915.89it/s]
120it [00:00, 80530.64it/s]
210it [00:00, 42186.11it/s]
252it [00:00, 41749.20it/s]
210it [00:00, 40470.68it/s]
120it [00:00, 36056.77it/s]
45it [00:00, 29376.45it/s]
10it [00:00, 21355.93it/s]
1it [00:00, 11618.57it/s]


In [81]:
sim_df = pd.DataFrame(0, index=np.arange(len(combinations)), columns=X.columns)
for idx, combination in tqdm_notebook(enumerate(combinations), total=len(combinations)):
    activated_feats = []
    for feat in combination:
        if "analyzer" in feat:
            feature_name_str, value_str = feat.split("::")[1].split("|")
            feature_name, value = (
                feature_name_str.split("=")[-1],
                value_str.split("=")[-1],
            )
            activated_feats.append(f"{feature_name}={value}")
        else:
            activated_feats.append(feat.replace("::", "__"))
    sim_df.loc[idx, activated_feats] = 1
sim_df = sim_df.apply(
    lambda row: row.map(lambda x: feat_map.get(row.name, 1) if x == 1 else x)
).dropna(axis=1, how="any")

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for idx, combination in tqdm_notebook(enumerate(combinations), total=len(combinations)):


  0%|          | 0/4069 [00:00<?, ?it/s]

In [82]:
sim_results = sim_df.copy(deep=True)
sim_results["activated_features"] = sim_results.apply(
    lambda row: [col for col in sim_results.columns if row[col] != 0], axis=1
)
sim_results["pred"] = model.predict(poly.transform(sim_df))
sim_results = sim_results.sort_values(by="pred", ascending=False).reset_index(drop=True)
sim_results[["activated_features", "pred"]].head(20)

Unnamed: 0,activated_features,pred
0,"[bertscore_length__min_val=0.33|max_val=0.67, ...",0.833486
1,"[complexity_of_intents=simple, cosine_sim__min...",0.826476
2,"[bertscore_length__min_val=0.0|max_val=0.33, c...",0.825858
3,"[bertscore__min_val=0.67|max_val=1.0, complexi...",0.821864
4,"[bertscore__min_val=0.67|max_val=1.0, bertscor...",0.82177
5,"[bertscore__min_val=0.67|max_val=1.0, complexi...",0.817687
6,"[complexity_of_intents=simple, cosine_sim__min...",0.815613
7,"[bertscore_length__min_val=0.33|max_val=0.67, ...",0.814914
8,"[bertscore_length__min_val=0.33|max_val=0.67, ...",0.807925
9,"[bertscore__min_val=0.33|max_val=0.67, bertsco...",0.806936


In [83]:
sim_results.activated_features.to_list()

[['bertscore_length__min_val=0.33|max_val=0.67',
  'complexity_of_intents=simple',
  'cosine_sim__min_val=0.67|max_val=1.0',
  'entity_sim__min_val=0.0|max_val=0.33',
  'format_constraints=1',
  'open_endedness=high',
  'rouge__min_val=0.0|max_val=0.33'],
 ['complexity_of_intents=simple',
  'cosine_sim__min_val=0.33|max_val=0.67',
  'len_shorter__min_val=0.67|max_val=1.0',
  'open_endedness=moderate',
  'rouge__min_val=0.0|max_val=0.33'],
 ['bertscore_length__min_val=0.0|max_val=0.33',
  'complexity_of_intents=simple',
  'format_constraints=1',
  'languages=English',
  'open_endedness=no',
  'rouge__min_val=0.0|max_val=0.33'],
 ['bertscore__min_val=0.67|max_val=1.0',
  'complexity_of_intents=simple',
  'entity_sim__min_val=0.33|max_val=0.67',
  'format_constraints=1',
  'open_endedness=no',
  'prompt_len__min_val=0.67|max_val=1.0'],
 ['bertscore__min_val=0.67|max_val=1.0',
  'bertscore_length__min_val=0.33|max_val=0.67',
  'complexity_of_intents=simple',
  'entity_sim__min_val=0.33|max

TODO: So now you have determined 10 feature combinations that seem to work well. The next step is to train RMs and evaluate them.
