In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict
from sklearn.metrics import confusion_matrix, accuracy_score

In [2]:
df_best_fit = pd.read_csv("../../data/fitting_results/best_fitting_models.csv")

Read the model recovery data

In [3]:
DATA_DIR = "../../data/model_recovery"
df = pd.DataFrame()
for i in range(111):
    try:
        df_i = pd.read_csv(f"{DATA_DIR}/recovery_results_{i}.csv")
    except FileNotFoundError:
        print(f"missing: {i}")
        continue
    df = df.append(df_i)
df.index = range(len(df))

missing: 0
missing: 14
missing: 15
missing: 23
missing: 46
missing: 47
missing: 86
missing: 89
missing: 92
missing: 109


In [4]:
len(df[df["generating_model"] == "sparse_max_continuous"])

14

In [5]:
len(df[df["generating_model"] == "sparse_max_discrete"])

19

In [6]:
model_names = ["sparse_max_continuous", "sparse_max_discrete", "sparse_lqr", "lqr", "null_model_1", "null_model_2", "hill_climbing"]

In [7]:
model_types = ("sparse_max_continuous", "sparse_max_discrete", "sparse_lqr", "lqr", "nm1", "nm2", "hc")
n_params = {"nm2": 2, "nm1": 4, "lqr": 2, "sparse_lqr": 3, "hc": 3, "sparse_max_continuous": 4, "sparse_max_discrete": 4}
df_aic = pd.DataFrame()
df_aic["situation"] = df["situation"]
df_aic["generating_model"] = df["generating_model"]
for model_type in model_types:
    df_aic[model_type] = df[model_type].apply(lambda x: 2 * n_params[model_type] - 2 * x)

Print which models' data is classified as coming from which other models

In [8]:
all_true = []
all_pred = []
for model_name in model_names:
    print(f"Generating model: {model_name.upper()}")
    df_model = df_aic[df_aic["generating_model"] == model_name].reset_index().drop("index", axis=1)
    print(f"n generating: {len(df_model)}")
    predicted_models = defaultdict(int)
    for index, row in df_model.iterrows():
        model_performances = list(df_model.iloc[index, 2:])
        best_model_idx = np.argmin(model_performances)
        predicted_models[model_names[best_model_idx]] += 1
        all_true.append(model_name)
        all_pred.append(model_names[best_model_idx])
    print("predicted models:")
    for model in predicted_models:
        print(f"{model}: {predicted_models[model]}")

Generating model: SPARSE_MAX_CONTINUOUS
n generating: 14
predicted models:
sparse_max_continuous: 9
hill_climbing: 2
sparse_max_discrete: 2
null_model_2: 1
Generating model: SPARSE_MAX_DISCRETE
n generating: 19
predicted models:
sparse_max_discrete: 13
null_model_2: 4
sparse_max_continuous: 1
hill_climbing: 1
Generating model: SPARSE_LQR
n generating: 35
predicted models:
sparse_lqr: 24
null_model_2: 11
Generating model: LQR
n generating: 0
predicted models:
Generating model: NULL_MODEL_1
n generating: 0
predicted models:
Generating model: NULL_MODEL_2
n generating: 0
predicted models:
Generating model: HILL_CLIMBING
n generating: 33
predicted models:
hill_climbing: 33


In [9]:
accuracy_score(all_true, all_pred)

0.7821782178217822

In [10]:
confusion_matrix(all_true, all_pred)

array([[33,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0],
       [ 0, 11, 24,  0,  0],
       [ 2,  1,  0,  9,  2],
       [ 1,  4,  0,  1, 13]], dtype=int64)