In [1]:
import json
import numpy as np
#from sklearn.neighbors import KernelDensity
import matplotlib.pyplot as plt
import scipy.stats as stats
import math
from sprint_individual_tournament import Cyclist, Tournament
from functools import partial
%matplotlib inline

In [2]:
data = json.load(open('./data/women_sprint.json',encoding="utf-8"))
del data['Bronze medal final']
del data['Bronze medal race']
del data['Gold medal final']
del data['Gold medal race']

In [3]:
gaps = []
behinds = []
ciclysts = {}
times = []
for name,info in data.items():
    ciclysts[name]={
        'name': name,
        'nation': None,
        'years': [],
        'qualifying': {
            'times': [],
            'behinds': []
        },
        'rounds': {
            'gaps': []
        }
    }
    for year,yinfo in sorted(info.items(), key=lambda x: x[0]):
        ciclysts[name]['years'].append(int(year))
        for rinfo in yinfo.values():
            if ciclysts[name]['nation'] is None:
                ciclysts[name]['nation'] = rinfo['Nation']
            if rinfo.get('Gap',None):
                try:
                    gap = float(rinfo.get('Gap'))
                    gaps.append(gap)
                    ciclysts[name]['rounds']['gaps'].append(gap)
                except ValueError:
                    if rinfo.get('Gap')=="X":
                        ciclysts[name]['rounds']['gaps'].append(0)

                if rinfo.get('Race 1'):
                    try:
                        gap = float(rinfo.get('Race 1'))
                        gaps.append(gap)
                        ciclysts[name]['rounds']['gaps'].append(gap)
                    except ValueError:
                        if rinfo.get('Race 1')=="X":
                            ciclysts[name]['rounds']['gaps'].append(0)

                    try:
                        gap = float(rinfo.get('Race 2'))
                        gaps.append(gap)
                        ciclysts[name]['rounds']['gaps'].append(gap)
                    except ValueError:
                        if rinfo.get('Race 2')=="X":
                            ciclysts[name]['rounds']['gaps'].append(0)
            else:
                if rinfo.get('Behind',None):
                    try:
                        behind = float(rinfo.get('Behind'))
                        behinds.append(behind)
                        ciclysts[name]['qualifying']['behinds'].append(behind)
                    except ValueError:
                        pass
                if rinfo.get('Time',None):
                    try:
                        time = float(rinfo.get('Time'))
                        times.append(time)
                        ciclysts[name]['qualifying']['times'].append(time)
                    except ValueError:
                        pass


In [4]:
gaps_m = np.array(list(sorted(gaps))).mean()
behinds_m = np.array(list(sorted(behinds))).mean()
times_m = np.array(list(sorted(times))).mean()

In [5]:
for name in ciclysts.keys():
    mu = np.array(ciclysts[name]['qualifying']['times']).mean()
    if len(ciclysts[name]['qualifying']['times'])==1:
        variance =(behinds_m +np.array(ciclysts[name]['qualifying']['behinds']).mean())/2
    else:
        variance = np.abs(np.diff(np.array(ciclysts[name]['qualifying']['times']))).mean()
    sigma = math.sqrt(variance)
    ciclysts[name]['qualifying']['mean']=mu
    ciclysts[name]['qualifying']['std']=sigma

    mu = times_m
    if len(ciclysts[name]['rounds']['gaps'])==0:
        variance = 3/2*gaps_m
    else:
        variance = np.array(ciclysts[name]['rounds']['gaps']).mean()
        if len(ciclysts[name]['rounds']['gaps'])==1:
            variance = (4/3*gaps_m + variance)/2

    sigma = math.sqrt(variance)
    ciclysts[name]['rounds']['mean']=mu
    ciclysts[name]['rounds']['std']=sigma

In [6]:
# generate cyclist time function asumming normal distribution
cyclists_i = []
for i in ciclysts.values():
    qualify_time_function = partial(stats.norm.rvs,i['qualifying']['mean'],i['qualifying']['std'])
    round_time_fuction = partial(stats.norm.rvs,i['rounds']['mean'],i['rounds']['std'])
    name = i['name']
    nation = i['nation']
    cyclist = Cyclist(qualify_time_function, round_time_fuction, name, nation)
    cyclists_i.append(cyclist)

In [7]:
tournament = Tournament(cyclists_i)

In [8]:
results = {i.name:[] for i in cyclists_i}
nit = 10000

In [9]:
# run nit tournaments and save results
for i in range(nit):
    tournament.run()
    res = list(map(lambda x: x.name,tournament.winners))
    tournament.reset()
    for n,name in enumerate(res):
        results[name].append(n)

In [10]:
confidences = {}

for name, places in results.items():
    mean = np.mean(places)
    interval = stats.t.interval(0.95, len(places)-1, loc=mean, scale=stats.sem(places))
    confidences[name]= {
        'mean':mean,
        'interval': interval,
        'gap': mean-interval[0]
    }

In [11]:
sorted_cyclists = list(sorted(confidences.items(),key=lambda x: x[1]['mean']))

In [12]:
fix_results = {}
min_rank = math.floor(sorted_cyclists[0][1]['mean'])
for n,(name, data) in enumerate(sorted_cyclists):
    rank = int(math.floor(data['mean'])-min_rank) + 1
    fix_results[name] = {
        'rank': rank,
        'gap': data['gap'],
        'sorted_rank': n
    }

In [13]:
fix_results

{'Kristina Vogel': {'rank': 1, 'gap': 0.1662528476859677, 'sorted_rank': 0},
 'Stephanie Morton': {'rank': 1, 'gap': 0.18036985086535218, 'sorted_rank': 1},
 'Kelsey Mitchell': {'rank': 3, 'gap': 0.24078682478207014, 'sorted_rank': 2},
 'Lea Friedrich': {'rank': 4, 'gap': 0.24519831536449388, 'sorted_rank': 3},
 'Zhong Tianshi': {'rank': 5, 'gap': 0.26611537836554433, 'sorted_rank': 4},
 'Lee Wai Sze': {'rank': 5, 'gap': 0.25027887153280304, 'sorted_rank': 5},
 'Miriam Vece': {'rank': 5, 'gap': 0.26179819931557446, 'sorted_rank': 6},
 'Sophie Capewell': {'rank': 6, 'gap': 0.27053449838190957, 'sorted_rank': 7},
 'Mathilde Gros': {'rank': 6, 'gap': 0.2446307038546074, 'sorted_rank': 8},
 'Kaarle McCulloch': {'rank': 6, 'gap': 0.24065460101568448, 'sorted_rank': 9},
 'Daria Shmeleva': {'rank': 8, 'gap': 0.25284675295976555, 'sorted_rank': 10},
 'Lauriane Genest': {'rank': 8, 'gap': 0.2902992953733694, 'sorted_rank': 11},
 'Simona Krupeckaitė': {'rank': 8,
  'gap': 0.23960941998878482,
  

In [14]:
json.dump(fix_results,open('women_sprint_results.json','w'), indent=2)