In [1]:
import json
from simulator import Athlete, Simulator
from sklearn.neighbors import KernelDensity
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import scipy.stats as stats
import math
from functools import partial
%matplotlib inline

In [2]:
athletes_data = json.load(open('data.json'))

In [3]:
def get_sample(func):
    return func()[0][0]

In [4]:
athletes = []
for i,j in athletes_data.items():
    vals = np.array(j ['fencing'])[:,np.newaxis]
    kde  = KernelDensity(bandwidth=0.05).fit(vals)
    fencing = partial(get_sample,kde.sample)
    vals = np.array(j ['swiming'])[:,np.newaxis]
    kde2  = KernelDensity(bandwidth=1).fit(vals)
    swiming = partial(get_sample,kde2.sample)
    vals = np.array(j ['laser-run'])[:,np.newaxis]
    kde3  = KernelDensity(bandwidth=5).fit(vals)
    laser_run = partial(get_sample,kde3.sample)
    vals = np.array(j ['riding'])[:,np.newaxis]
    kde4  = KernelDensity(bandwidth=10).fit(vals)
    riding = partial(get_sample,kde3.sample)
    athlete = Athlete(i,fencing,swiming,riding,laser_run)
    athletes.append(athlete)

In [5]:
tournament = Simulator(athletes)

In [6]:
#tournament.run()

In [7]:
#res = list(map(lambda x: x.name,tournament.winners))
#for i in tournament.winners:
#    print(i.name,i.points, i.time, i.handicapTime)

In [8]:
#tournament.reset()

In [9]:
results = {i.name:[] for i in athletes}
nsim = 10000

In [10]:
for i in range(nsim):
    tournament.run()
    res = list(map(lambda x: x.name,tournament.winners))
    tournament.reset()
    for n,name in enumerate(res):
        results[name].append(n)

In [11]:
confidences = {}

for name, places in results.items():
    mean = np.mean(places)
    interval = stats.t.interval(0.95, len(places)-1, loc=mean, scale=stats.sem(places))
    confidences[name]= {
        'mean':mean,
        'interval': interval,
        'gap': mean-interval[0]
    }

In [12]:
sorted_athletes = list(sorted(confidences.items(),key=lambda x: x[1]['mean']+x[1]['gap']))

In [19]:
fix_results = {}
min_rank = math.floor(sorted_athletes[0][1]['mean'])
for n,(name, data) in enumerate(sorted_athletes):
    rank = data['mean']-min_rank + 1
    fix_results[name] = {
        'rank': rank,
        'gap': data['gap'],
        'sorted_rank': n,
        'nation': athletes_data[name]['nation']
    }

In [20]:
json.dump(fix_results,open('men_pentathlon.json','w'), indent=2)

In [21]:
sorted_athletes

[('palazkov ilya',
  {'mean': 8.1334,
   'interval': (8.000881442303916, 8.265918557696084),
   'gap': 0.13251855769608412}),
 ('luo shuai',
  {'mean': 10.5182,
   'interval': (10.343759721255468, 10.692640278744532),
   'gap': 0.17444027874453205}),
 ('choong joseph',
  {'mean': 10.6256,
   'interval': (10.458962771320554, 10.792237228679447),
   'gap': 0.16663722867944664}),
 ('elgendy ahmed',
  {'mean': 11.1505,
   'interval': (10.963520071975676, 11.337479928024322),
   'gap': 0.18697992802432317}),
 ('gustenau gustav',
  {'mean': 11.874,
   'interval': (11.711691334316866, 12.036308665683135),
   'gap': 0.16230866568313473}),
 ('vlach martin',
  {'mean': 12.1817,
   'interval': (12.006953326385663, 12.356446673614336),
   'gap': 0.1747466736143366}),
 ('belaud valentin',
  {'mean': 12.2937,
   'interval': (12.093130017085095, 12.494269982914904),
   'gap': 0.2005699829149048}),
 ('prades valentin',
  {'mean': 12.6501,
   'interval': (12.481185809978177, 12.819014190021823),
   'ga