In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns; sns.set()
import scipy
import scipy.stats as stats
import os

In [2]:
# project files

from utils.variables import pack, unpack
from utils.display import table, tabulate
from utils.files import download_sheet, save, load
from utils.hash import digest


from fit_options import fit_options
from game_model import game
from rp_model import compute_rp, make_precomputed_columns
from initial_guess import make_initial_guess


In [3]:
# stuff for display

from IPython.lib.pretty import pretty, pprint

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 120)

In [4]:
# Load previous data

data = pd.read_pickle(fit_options.data_file)
data.describe()
data.head()

Unnamed: 0,Level,RP,MS lvl,ModelRP,Difference,Freq1,FreqL,Inv,Berry1,BerryL,Ing1P,Helps per hour,Helps Neutral,NrgMult,IngrMult,SkillMult,Ingr%,SklContr,BerryD,IngD,Dupes,Amnt,Ing2P,Help skill bonus,RP Multiplier
count,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0,7394.0
mean,11.159183,633.553827,1.531106,631.221396,-0.002029,3910.792534,3754.827046,12.358128,28.068434,38.351636,108.905058,1.035135,0.988096,0.994601,0.998999,1.007276,0.206731,34.558823,56.060319,148.205504,1.0,0.086827,3.585475,0.986585,1.030131
std,7.404413,413.293029,1.021303,413.930272,0.580695,973.915081,1037.172015,3.479251,2.616819,8.314996,16.191425,0.310743,0.058787,0.044944,0.117892,0.118634,0.045978,38.524205,28.135215,61.483427,0.0,0.557071,22.268253,0.039022,0.072489
min,1.0,245.0,1.0,-1.0,-21.0,2200.0,1489.1184,7.0,24.0,24.0,90.0,0.52,0.89,0.92,0.8,0.8,0.0,0.0,25.0,90.0,1.0,0.0,0.0,0.79,1.0
25%,5.0,361.0,1.0,360.0,0.0,3100.0,2910.0,10.0,26.0,33.0,98.0,0.81,1.0,1.0,1.0,1.0,0.18,13.16,36.0,101.0,1.0,0.0,0.0,1.0,1.0
50%,10.0,489.0,1.0,487.0,0.0,3800.0,3731.6,12.0,28.0,37.0,103.0,0.96,1.0,1.0,1.0,1.0,0.2,20.2,46.0,121.0,1.0,0.0,0.0,1.0,1.0
75%,14.0,746.0,2.0,743.0,0.0,4500.0,4419.0,14.0,31.0,42.0,115.0,1.23,1.0,1.0,1.0,1.0,0.23,47.6825,68.0,202.0,1.0,0.0,0.0,1.0,1.0
max,42.0,3493.0,6.0,3495.0,5.0,6300.0,6916.14,24.0,35.0,83.0,151.0,2.41,1.12,1.08,1.2,1.2,0.42,414.09,243.0,511.0,1.0,8.0,342.0,1.0,1.44


Unnamed: 0,Pokemon,Level,RP,Nature,MS lvl,Source,ModelRP,Difference,Nature2,ID,NatureP,NatureN,Freq1,FreqL,Type,Class,MSkill,Inv,Berry1,BerryL,Ing1,Ing1P,Helps per hour,Helps Neutral,NrgMult,IngrMult,SkillMult,Ingr%,SklContr,RPneutral,IDNeutral,BerryD,IngD,Dupes,DupeMatch,Sub Skill 1,Sub Skill 2,Ingredient 2,Amnt,Ing2P,Help skill bonus,RP Multiplier
0,Arbok,9.0,574.0,Naughty,1.0,Rate My Mon,574.0,0.0,Naughty,ArbokNaughty91,Speed of Help,Main Skill Chance,3700.0,3276.72,Poison,Berries,Charge Energy S,14.0,32.0,40,Bean Sausage,103.0,1.09,0.9,1.0,1.0,0.8,0.26,22.75,538.35,ArbokNeutral91,80.0,103.0,1,True,,,,0.0,0.0,1.0,1.0
1,Arbok,8.0,531.0,Hardy,1.0,Questions help guides,531.0,0.0,Neutral,ArbokNeutral81,-,-,3700.0,3648.2,Poison,Berries,Charge Energy S,14.0,32.0,39,Bean Sausage,103.0,0.98,1.0,1.0,1.0,1.0,0.26,22.75,531.0,ArbokNeutral81,78.0,103.0,1,True,,,,0.0,0.0,1.0,1.0
2,Arbok,8.0,538.0,Quiet,1.0,Questions help guides,538.0,0.0,Quiet,ArbokQuiet81,Ingredient Finding,Exp Gains,3700.0,3648.2,Poison,Berries,Charge Energy S,14.0,32.0,39,Bean Sausage,103.0,0.98,1.0,1.0,1.2,1.0,0.26,22.75,530.6,ArbokNeutral81,78.0,103.0,1,True,,,,0.0,0.0,1.0,1.0
3,Arbok,9.0,582.0,Relaxed,1.0,,582.0,0.0,Relaxed,ArbokRelaxed91,Energy Recovery,EXP Gains,3700.0,3640.8,Poison,Berries,Charge Energy S,14.0,32.0,40,Bean Sausage,103.0,0.98,1.0,1.08,1.0,1.0,0.26,22.75,538.89,ArbokNeutral91,80.0,103.0,1,True,,,,0.0,0.0,1.0,1.0
4,Arbok,9.0,489.0,Timid,1.0,,489.0,0.0,Timid,ArbokTimid91,EXP Gains,Speed of Help,3700.0,4004.88,Poison,Berries,Charge Energy S,14.0,32.0,40,Bean Sausage,103.0,0.89,1.1,1.0,1.0,1.0,0.26,22.75,538.45,ArbokNeutral91,80.0,103.0,1,True,,,,0.0,0.0,1.0,1.0


In [5]:
# Load previous fit

x0, unpack_info = pack(make_initial_guess())

hash_value = digest(data, x0)
filename = fit_options.result_file(hash_value)

opt = load(filename)
sol = unpack(opt.x, unpack_info)

opt

    message: `ftol` termination condition is satisfied.
    success: True
     status: 2
          x: [ 2.563e-01  2.544e-01 ...  2.210e-01  2.211e-01]
       cost: 1160.2490719227876
       grad: [-2.782e-05 -5.842e-06 ...  0.000e+00  0.000e+00]
 optimality: 0.43414554256014526
       nfev: 19
       njev: 7

In [6]:
# Helpers

def truncated_normal_sample(size, mu, sigma, lower, upper):
    return stats.truncnorm.rvs( (lower - mu) / sigma, (upper - mu) / sigma, loc=mu, scale=sigma, size=size)

def round_noise(size):
    eps = 1e-6
    return truncated_normal_sample(size, mu=0.0, sigma=0.25, lower= -0.5+eps, upper= 0.5-eps)

def harmonic(a,b):
    return 2.0 / ( 1.0 / a + 1.0 / b )

In [7]:
# In the boostrap method,
# We create N synthetic datasets that are likely to represent the population
# Then we redo the fit on those, starting from the best fit in initial dataset.
# Finally, we collect statistics on the fitted variables between runs

# To create the synthetic datasets, we use sampling with replacement (allow duplicate)
# Because some Pokémon are rare, we use stratified sampling

stratas = data.groupby(['Pokemon'], group_keys=False)
target_group_size =  stratas.size().quantile(q=0.75)
    

n_bootstraps = 50
all_opt_x = [opt.x]

In [8]:
cache_fit = True
boostrap_filename = f"./results/bootstrap-fit-{hash_value}.pickle"

if cache_fit and os.path.isfile(boostrap_filename):

    all_opt_x = load(boostrap_filename)
    print("Loaded from cache")

else:

    # WARNING: RUNNING THE OPTIMISATION N TIMES IS VERY LONG.
    #
    # YOU CAN INTERRUPT THE KERNEL (STOP BUTTON)
    # AND RUN THE NEXT CELLS IF YOU WANT TO SEE CURRENT PROGRESS
    #
    # THE COLLECTION `all_opt_x` IS DEFINED ABOVE THIS CELL
    # SO YOU CAN RESUME ADDING MORE RUNS

    while(len(all_opt_x) < n_bootstraps):

        print( "Boostrap run " + str(len(all_opt_x)) )

        # To create the synthetic datasets, we use sampling with replacement (allow duplicate)

        # We'll introduce a correction toward more equal sample size between Pokémon
        # We can motivate that by the fact stratified sampling should use population proportions
        # instead of current data proportions

        resampled = stratas.apply(lambda x: x.sample( round( harmonic(len(x),target_group_size) ), replace=True, ignore_index=True))

        # Add small noise to RP
        # We do so to simulate some unknown value that would round() to current RP

        referenceRP = resampled["RP"].to_numpy()
        referenceRP += round_noise(len(resampled))

        # Compute per sample information about help time, nature, subskills etc
        recomputed = make_precomputed_columns(resampled)

        # Put the pieces together

        def residual(x):
            return referenceRP - compute_rp(x, resampled, recomputed, unpack_info)

        #fit_options.soft_round.exact = False
        #fit_options.soft_round.alpha = 6

        # Redo the fit, starting from the optimal we found on current data
        opt2 = scipy.optimize.least_squares(residual, opt.x, **fit_options.least_squares_kwargs)

        # Collect results for stats
        all_opt_x.append(opt2.x)

    # Save
    save(boostrap_filename, all_opt_x)

Boostrap run 1
   Iteration     Total nfev        Cost      Cost reduction    Step norm     Optimality   
       0              1         1.2962e+03                                    2.78e+06    
       1              7         1.2272e+03      6.90e+01       3.34e-01       2.64e+04    
       2              9         1.2265e+03      7.36e-01       1.67e-01       1.38e+03    
       3             12         1.2264e+03      1.42e-02       2.09e-02       2.08e+02    
       4             14         1.2264e+03      2.27e-03       1.04e-02       7.05e+01    
       5             17         1.2264e+03      9.29e-05       1.31e-03       7.10e+00    
       6             18         1.2264e+03      7.44e-05       2.61e-03       8.36e+00    
       7             20         1.2264e+03      2.22e-07       1.31e-03       2.56e-01    
`ftol` termination condition is satisfied.
Function evaluations 20, initial cost 1.2962e+03, final cost 1.2264e+03, first-order optimality 2.56e-01.
Boostrap run 2
  

In [9]:
# Stats
# We'll use the median as estimate for the mean.
# And 1.4826 * < median absolute deviation from the median > as estimate for std

center = np.median(all_opt_x, axis=0)

interval_std = 1.96 * np.std( all_opt_x, axis=0) # 95% confidence region
interval_mad = 3 * np.median( list(map(lambda x: np.abs(center-x), all_opt_x)), axis=0) # 95% confidence region

sol_center = unpack(center, unpack_info)
sol_interval_std = unpack(interval_std, unpack_info)
sol_interval_mad = unpack(interval_mad, unpack_info)

# Pretty display
pd.DataFrame({
    
    "Pokemon":game.data.pokedex["Pokemon"],
    
    "ing%": sol_center["Pokemons ing fractions"]*100.0,
    #"conf (ing)": sol_interval_std["Pokemons ing fractions"]*100.0,
    "conf (ing)*": sol_interval_mad["Pokemons ing fractions"]*100.0,

    "skill% * skillValue": sol_center["Pokemons skill products"], 
    #"conf (skill)": sol_interval_std["Pokemons skill products"],
    "conf (skill)*": sol_interval_mad["Pokemons skill products"],


}).set_index("Pokemon")

Unnamed: 0_level_0,ing%,conf (ing)*,skill% * skillValue,conf (skill)*
Pokemon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bulbasaur,25.635983,0.04266,16.698031,0.05902
Ivysaur,25.441758,0.025436,16.711705,0.024815
Venusaur,26.556069,0.022794,18.478562,0.019385
Charmander,20.038234,0.042634,9.683594,0.069433
Charmeleon,22.672693,0.031352,14.050666,0.032316
Charizard,22.341004,0.048903,14.127069,0.077604
Squirtle,27.019292,0.047006,17.608523,0.055647
Wartortle,27.03824,0.032775,17.587303,0.018996
Blastoise,27.423095,0.033503,18.466855,0.020008
Caterpie,17.912884,0.123101,7.000228,0.050553


In [10]:
# Explain what we did with the re-sampling target size
# pd.DataFrame({'before': stratas.size(), 'after': stratas.apply(lambda x: round( harmonic(len(x), target_group_size) )) })