In [1]:
import pandas as pd
from fourinarowfunctions import load_tabular, read_subject_nlls
from numpy import log as ln

The definition for BIC is

> BIC = −2 × ln(likelihood) + ln(N ) × k
 
But our input is nll, so we have

>BIC =  2 × nll + ln(N ) × k

In [2]:
def get_subject_decisions(split_folder):
    games = load_tabular(split_folder + "/**/data.csv", False)
    decisions = {}
    for game in games:
        player = str(game["player_name"].iloc[0])
        count = decisions[player] if player in decisions else 0
        decisions[player] = count + len(game)
    return decisions

def get_subject_bics(decisions, fit_folder, n_parameters):
    nll = read_subject_nlls(fit_folder)
    print(f"Got nlls for {len(nll)} subjects")
    result = {subject: 2 * nll[subject] + n_parameters * ln(decisions[subject]) for subject in nll}
    print(f"Calculated bics for {len(result)} subjects")
    return result

In [3]:
decisions = get_subject_decisions("../data/splits")
bics_main = get_subject_bics(decisions, "../data/fit_main", 10)
bics_no_tree = get_subject_bics(decisions, "../data/fit_no_tree_bas_method", 8)

Got nlls for 157 subjects
Calculated bics for 157 subjects
Got nlls for 157 subjects
Calculated bics for 157 subjects


In [4]:
df_bics_main = pd.DataFrame(list(bics_main.items()), columns = ["subject", "main"])
df_bics_no_tree = pd.DataFrame(list(bics_no_tree.items()), columns = ["subject", "no tree"])
df_bics = pd.merge(df_bics_main, df_bics_no_tree)

In [5]:
df_bics.to_csv("../data/data_bic_main_notree.csv", index=False)