In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict
from sklearn.metrics import confusion_matrix, accuracy_score

Read the model recovery data

In [2]:
# read all the model recovery results and compile them into a single df 
DATA_DIR = "../../data/model_recovery"
df = pd.DataFrame()
for i in range(111):
    try:
        df_i = pd.read_csv(f"{DATA_DIR}/recovery_results_{i}.csv")
    except FileNotFoundError:
        print(f"missing: {i}")
        continue
    df = df.append(df_i)
df.index = range(len(df))

missing: 0
missing: 11
missing: 15
missing: 23
missing: 40
missing: 42
missing: 47
missing: 50
missing: 55
missing: 63
missing: 66
missing: 73
missing: 92
missing: 93
missing: 99
missing: 109


In [3]:
len(df[df["generating_model"] == "sparse_max_continuous"])

11

In [4]:
len(df[df["generating_model"] == "sparse_max_discrete"])

34

In [5]:
model_types = ("sparse_max_continuous", "sparse_max_discrete", "sparse_lqr", "lqr", "nm1", "nm2", "hc")
n_params = {"nm2": 2, "nm1": 4, "lqr": 2, "sparse_lqr": 3, "hc": 3, "sparse_max_continuous": 4, "sparse_max_discrete": 4}

# compile a dataframe with the AIC of each model on the simulated data
df_aic = pd.DataFrame()
df_aic["situation"] = df["situation"]
df_aic["generating_model"] = df["generating_model"]
for model_type in model_types:
    df_aic[model_type] = df[model_type].apply(lambda x: 2 * n_params[model_type] - 2 * x)

In [6]:
df_aic.loc[12]

situation                [-117.0, 249.0, 246.0, -225.0, 142.0]
generating_model                                    sparse_lqr
sparse_max_continuous                                69.788818
sparse_max_discrete                                  62.089762
sparse_lqr                                          -11.532068
lqr                                                 126.873504
nm1                                                  56.448896
nm2                                                  66.337134
hc                                                   53.813034
Name: 12, dtype: object

Print which models' data is classified as coming from which other models

In [7]:
all_true = []
all_pred = []
model_names = ("sparse_max_continuous", "sparse_max_discrete", "sparse_lqr", "lqr", "null_model_1", "null_model_2", "hill_climbing") 
for model_name in model_names:
    # print out the name of the generating model
    print(f"Generating model: {model_name.upper()}")
    
    # print the counts of each predicted model on data generated from this model
    df_model = df_aic[df_aic["generating_model"] == model_name].reset_index().drop("index", axis=1)
    print(f"n generating: {len(df_model)}")
    predicted_models = defaultdict(int)
    for index, row in df_model.iterrows():
        model_performances = list(df_model.iloc[index, 2:])
        best_model_idx = np.argmin(model_performances)
        predicted_models[model_names[best_model_idx]] += 1
        all_true.append(model_name)
        all_pred.append(model_names[best_model_idx])
    print("predicted models:")
    for model in predicted_models:
        print(f"{model}: {predicted_models[model]}")

Generating model: SPARSE_MAX_CONTINUOUS
n generating: 11
predicted models:
sparse_max_continuous: 5
lqr: 1
hill_climbing: 2
null_model_2: 1
null_model_1: 1
sparse_max_discrete: 1
Generating model: SPARSE_MAX_DISCRETE
n generating: 34
predicted models:
sparse_max_discrete: 32
null_model_2: 2
Generating model: SPARSE_LQR
n generating: 17
predicted models:
null_model_2: 3
sparse_lqr: 14
Generating model: LQR
n generating: 0
predicted models:
Generating model: NULL_MODEL_1
n generating: 0
predicted models:
Generating model: NULL_MODEL_2
n generating: 0
predicted models:
Generating model: HILL_CLIMBING
n generating: 33
predicted models:
hill_climbing: 33


In [8]:
accuracy_score(all_true, all_pred)

0.8842105263157894

In [9]:
confusion_matrix(all_true, all_pred)

array([[33,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  3, 14,  0,  0],
       [ 2,  1,  1,  1,  0,  5,  1],
       [ 0,  0,  0,  2,  0,  0, 32]], dtype=int64)