In [1]:
import os

import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

sns.set()

https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.direct.html

In [2]:
# project files
os.environ.setdefault("RP_MODEL_FILE_PATH", "../files")

from rp_model.utils import pack, table, load, digest
from rp_model.calc import FitOptions, RoundApprox, compute_rp, make_precomputed_columns, make_initial_guess

In [None]:
# stuff for display

from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

# progress bar

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 120)

In [None]:
data = pd.read_pickle(FitOptions.data_file)
data.describe()
data.head()

In [5]:
# Make an initial guess
# Pack the dictionary into a 1-D vector.
# Also store information on how to unpack that vector

x0, unpack_info = pack(*make_initial_guess())
computed = make_precomputed_columns(data)

KeyError: 'Sub Skill 3'

In [None]:
# We compute the RP model in rp_model.py
# Here we create closure to glue the pieces together.

def RP(x):
    return compute_rp(x, data, computed, unpack_info)


# for the error analysis page,
# we'll compute the residual with exact rounding.

def residual(x):
    return data["RP"] - np.round(RP(x))


In [None]:
filename = FitOptions.get_result_file(digest(data, x0))
opt = load(filename)
opt

In [None]:
FitOptions.rounding.rp = RoundApprox.Exact
FitOptions.rounding.bonus = RoundApprox.Exact
#FitOptions.soft_round.alpha = 18

optx = opt.x
choice_list = [0.5 * optx, 1.5 * optx]
bounds_low = np.select([optx >= 0, optx < 0], choice_list)
bounds_high = np.select([optx < 0, optx >= 0], choice_list)

result = solve(residual, optx, bounds=(bounds_low, bounds_high), scaling_within_bounds=True, print_progress=True,
    maxfun=2000)

In [None]:
table(result)

In [None]:
r_opt = data["RP"] - np.round(RP(result.x))
pd.Series(r_opt).describe()

_ = plt.figure()
g = sns.histplot(x=np.clip(np.abs(r_opt), 1e-1, 1e3), log_scale=True, bins=20)
g.axes.set_yscale('log')
_ = g.bar_label(g.containers[1], labels=[str(v) if v else '' for v in g.containers[1].datavalues])

_ = plt.figure()
g = sns.histplot(x=r_opt, discrete=1)
_ = g.axes.set_yscale('log')
_ = g.bar_label(g.containers[1], labels=[str(v) if v else '' for v in g.containers[1].datavalues])