In [1]:
import json
from simulator import Athlete, Simulator
from sklearn.neighbors import KernelDensity
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import scipy.stats as stats
import math
from functools import partial
%matplotlib inline

In [2]:
athletes_data = json.load(open('data-women.json'))

In [3]:
def get_sample(func):
    return func()[0][0]

In [4]:
athletes = []
for i,j in athletes_data.items():
    vals = np.array(j ['fencing'])[:,np.newaxis]
    kde  = KernelDensity(bandwidth=0.05).fit(vals)
    fencing = partial(get_sample,kde.sample)
    vals = np.array(j ['swiming'])[:,np.newaxis]
    kde2  = KernelDensity(bandwidth=1).fit(vals)
    swiming = partial(get_sample,kde2.sample)
    vals = np.array(j ['laser-run'])[:,np.newaxis]
    kde3  = KernelDensity(bandwidth=5).fit(vals)
    laser_run = partial(get_sample,kde3.sample)
    vals = np.array(j ['riding'])[:,np.newaxis]
    kde4  = KernelDensity(bandwidth=10).fit(vals)
    riding = partial(get_sample,kde3.sample)
    athlete = Athlete(i,fencing,swiming,riding,laser_run)
    athletes.append(athlete)

In [5]:
tournament = Simulator(athletes)

In [6]:
#tournament.run()

In [7]:
#res = list(map(lambda x: x.name,tournament.winners))
#for i in tournament.winners:
#    print(i.name,i.points, i.time, i.handicapTime)

In [8]:
#tournament.reset()

In [9]:
results = {i.name:[] for i in athletes}
nsim = 10000

In [10]:
for i in range(nsim):
    tournament.run()
    res = list(map(lambda x: x.name,tournament.winners))
    tournament.reset()
    for n,name in enumerate(res):
        results[name].append(n)

In [24]:
confidences = {}

for name, places in results.items():
    mean = np.mean(places)
    interval = stats.t.interval(0.95, len(places)-1, loc=mean, scale=stats.sem(places))
    confidences[name]= {
        'mean':mean,
        'interval': interval,
        'gap': mean-interval[0]
    }

In [25]:
sorted_athletes = list(sorted(confidences.items(),key=lambda x: x[1]['mean']))

In [26]:
fix_results = {}
min_rank = math.floor(sorted_athletes[0][1]['mean'])
for n,(name, data) in enumerate(sorted_athletes):
    rank = data['mean']-min_rank + 1
    fix_results[name] = {
        'rank': rank,
        'gap': data['gap'],
        'sorted_rank': n,
        'nation': athletes_data[name]['nation']
    }

In [27]:
json.dump(fix_results,open('women_pentathlon.json','w'), indent=2)

In [28]:
min_rank

8

In [17]:
sorted_athletes

[('oteiza marie',
  {'mean': 8.6804,
   'interval': (8.528935885810242, 8.83186411418976),
   'gap': 0.1514641141897588}),
 ('silkina volha',
  {'mean': 9.1018,
   'interval': (8.945615147067953, 9.257984852932049),
   'gap': 0.1561848529320482}),
 ('kovacs sarolta',
  {'mean': 9.2451,
   'interval': (9.156190916253266, 9.334009083746736),
   'gap': 0.08890908374673501}),
 ('clouvel elodie',
  {'mean': 9.4212,
   'interval': (9.27465377544446, 9.567746224555542),
   'gap': 0.14654622455554112}),
 ('french kate',
  {'mean': 9.596,
   'interval': (9.42678087997711, 9.76521912002289),
   'gap': 0.1692191200228894}),
 ('muir joanna',
  {'mean': 9.7352,
   'interval': (9.597130232291752, 9.87326976770825),
   'gap': 0.13806976770824875}),
 ('prokopenko anastasiya',
  {'mean': 9.9421,
   'interval': (9.784339932367523, 10.099860067632477),
   'gap': 0.15776006763247707}),
 ('gulyas michelle',
  {'mean': 10.3547,
   'interval': (10.163170546112855, 10.546229453887143),
   'gap': 0.19152945388