In [662]:
import pandas as pd
import numpy as np
from scipy.stats import lognorm
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.metrics import mean_absolute_error

In [663]:
in_file_name = 'data/grouped_paces_ju.tsv'
df_all = pd.read_csv(in_file_name, delimiter="\t")
history = df_all

In [664]:
order18 = pd.read_csv('data/running_order_j2018_ju.tsv', delimiter="\t")
order18 = order18[np.isfinite(order18.team_id)]

In [665]:
#paces = df.as_matrix(["pace_1", "pace_2", "pace_3", "pace_4", "pace_5", "pace_6"])
paces = history[["pace_1", "pace_2", "pace_3", "pace_4", "pace_5", "pace_6"]]
logs = np.log(paces)
means = np.nanmean(logs, axis=1)
stdevs = np.nanstd(logs, axis=1)
history = history.assign(log_means=pd.Series(means).values)
history = history.assign(log_stdevs=pd.Series(stdevs).values)

  after removing the cwd from sys.path.
  keepdims=keepdims)


In [666]:
# Estimate values for all, but only use them if no history is available
with_history = history[np.isfinite(history.pace_2)]
x = with_history.mean_team_id.values
x = x.reshape(len(x), 1)

log_means = with_history.log_means.values.reshape(len(with_history.log_means), 1)
log_means_model = linear_model.LinearRegression()
log_means_model.fit(x, log_means)

estimated_log_means = log_means_model.predict(order18.team_id.values.reshape(len(order18.team_id), 1))
order18 = order18.assign(estimated_log_means=estimated_log_means)

log_stdevs = with_history.log_stdevs.values.reshape(len(with_history.log_stdevs), 1)
log_stdevs_model = linear_model.LinearRegression()
log_stdevs_model.fit(x, log_stdevs)

estimated_log_stdevs = log_stdevs_model.predict(order18.team_id.values.reshape(len(order18.team_id), 1))
order18 = order18.assign(estimated_log_stdevs=estimated_log_stdevs)
order18.head()

Unnamed: 0,team_id,team,team_base_name,leg,leg_dist,name,estimated_log_means,estimated_log_stdevs
4,5.0,Halden SK 1,Halden SK,1,11.0,Emil Wingstedt,1.944872,0.085008
5,5.0,Halden SK 1,Halden SK,2,11.9,Fredrik Eliasson,1.944872,0.085008
6,5.0,Halden SK 1,Halden SK,3,12.8,Thomas Natvig Årstad,1.944872,0.085008
7,5.0,Halden SK 1,Halden SK,4,8.7,Alexander Chepelin,1.944872,0.085008
8,5.0,Halden SK 1,Halden SK,5,8.7,Riccardo Scalet,1.944872,0.085008


In [667]:
# Combine history with 2018 runners 
no_history_row = pd.DataFrame([[0,0]], columns=["log_means", "log_stdevs"])
def get_history_row(running_order_row):
    name = running_order_row["name"].lower()
    
    by_name = history[history['name'] == name]
    by_name_and_colon = history[history['name'].str.contains(name + ":", regex=False)]

    runners = by_name.append(by_name_and_colon)
    if(len(runners) == 1):
        return runners
    team_name = running_order_row["team_base_name"].upper()
    runners = runners[runners['teams'].str.contains(team_name, regex=False)]
    if(len(runners) == 1):
        return runners
    if(len(runners) == 0):
        return no_history_row
    print(f"name '{name}' team_name '{team_name}'")
    print(f"by_name {len(by_name)} by_name_and_colon {len(by_name_and_colon)} runners {len(runners)}")
    print(f"Duplicate runner {runners}")
    #print(f"TEAMS by_name_and_colon {by_name_and_colon['teams']}")
    return runners.sort_values("num_runs", ascending = False).head(1)

def get_estimate_params(running_order_row):
    history_row = get_history_row(running_order_row)
    #print(f"estimate_row log_means {history_row.log_means} {history_row.log_stdevs}")
    log_means = history_row.log_means.values[0]
    log_stdevs = history_row.log_stdevs.values[0]
    return pd.Series({"history_log_means": log_means, "history_log_stdevs": log_stdevs})

#order18 = order18[order18['team'].str.contains("Reak") | order18['team'].str.contains("Puskasil") | order18['team'].str.contains("Rastihaukat")]
estimate_params = order18.apply(lambda row: get_estimate_params(row), axis=1)
order18 = order18.assign(history_log_means = estimate_params.history_log_means)
order18 = order18.assign(history_log_stdevs = estimate_params.history_log_stdevs)

name 'petri laaksonen' team_name 'UNO SK'
by_name 0 by_name_and_colon 6 runners 2
Duplicate runner       mean_team_id            teams                             name  \
3977         611.0  UNO SK-HARRASTE  petri laaksonen:UNO SK-HARRASTE   
3979         698.0           UNO SK           petri laaksonen:UNO SK   

      num_runs  num_valid_times  mean_pace  stdev  pace_1  pace_2  pace_3  \
3977         1                1      8.749    0.0   8.749     NaN     NaN   
3979         1                1      7.124    0.0   7.124     NaN     NaN   

      pace_4  pace_5  pace_6  log_means  log_stdevs  
3977     NaN     NaN     NaN   2.168939         0.0  
3979     NaN     NaN     NaN   1.963469         0.0  
name 'arto laaksonen' team_name 'UNO SK'
by_name 0 by_name_and_colon 6 runners 2
Duplicate runner       mean_team_id            teams                            name  num_runs  \
3984         611.0  UNO SK-HARRASTE  arto laaksonen:UNO SK-HARRASTE         1   
3985         698.0           U

In [668]:
order18['log_means'] = np.where(np.isfinite(order18["history_log_means"]) & order18["history_log_means"] > 0, order18["history_log_means"], order18["estimated_log_means"])
order18['log_stdevs'] = np.where(np.isfinite(order18["history_log_stdevs"]) & order18["history_log_stdevs"] > 0, order18["history_log_stdevs"], order18["estimated_log_stdevs"])
order18.head()


Unnamed: 0,team_id,team,team_base_name,leg,leg_dist,name,estimated_log_means,estimated_log_stdevs,history_log_means,history_log_stdevs,log_means,log_stdevs
4,5.0,Halden SK 1,Halden SK,1,11.0,Emil Wingstedt,1.944872,0.085008,1.763072,0.025436,1.763072,0.025436
5,5.0,Halden SK 1,Halden SK,2,11.9,Fredrik Eliasson,1.944872,0.085008,1.842846,0.093588,1.842846,0.093588
6,5.0,Halden SK 1,Halden SK,3,12.8,Thomas Natvig Årstad,1.944872,0.085008,1.79791,0.1036,1.79791,0.1036
7,5.0,Halden SK 1,Halden SK,4,8.7,Alexander Chepelin,1.944872,0.085008,1.929038,0.069687,1.929038,0.069687
8,5.0,Halden SK 1,Halden SK,5,8.7,Riccardo Scalet,1.944872,0.085008,1.958229,0.119586,1.958229,0.119586


In [669]:
# Calculate personal estimates
# s = sigma and scale = exp(mu).

log_means = np.exp(order18['log_means']) 
log_stdevs = order18['log_stdevs']

intervals95 = lognorm.interval(0.95, s = log_stdevs, scale = log_means)
means = lognorm.mean(s = log_stdevs, scale = log_means)
medians = lognorm.median(s = log_stdevs, scale = log_means)

In [670]:
order18 = order18.assign(ind_95_start = pd.Series(intervals95[0] * order18.leg_dist).values)
order18 = order18.assign(ind_95_end = pd.Series(intervals95[1] * order18.leg_dist).values)
order18 = order18.assign(ind_mean = pd.Series(means * order18.leg_dist).values)
order18 = order18.assign(ind_median = pd.Series(medians * order18.leg_dist).values)

In [671]:
by_teams = order18.set_index(["team_id", "leg"]).unstack()
by_teams.head()

Unnamed: 0_level_0,team,team,team,team,team,team,team,team_base_name,team_base_name,team_base_name,...,ind_mean,ind_mean,ind_mean,ind_median,ind_median,ind_median,ind_median,ind_median,ind_median,ind_median
leg,1,2,3,4,5,6,7,1,2,3,...,5,6,7,1,2,3,4,5,6,7
team_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
5.0,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK,Halden SK,Halden SK,...,62.097304,75.79495,85.633899,64.133501,75.142332,77.273814,59.881118,61.654866,75.521582,85.578863
6.0,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat,Helsingin Suunnistajat,Helsingin Suunnistajat,...,56.917205,73.143192,95.454671,71.06753,81.893692,79.259454,56.622836,56.567705,72.479759,95.225971
11.0,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne,OK Linne,OK Linne,...,56.721972,67.114796,92.568771,68.047577,75.63089,74.017768,51.993243,55.59339,66.93345,92.321787
13.0,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot,Vehkalahden Veikot,Vehkalahden Veikot,...,55.101333,72.915405,95.417328,71.504227,74.17452,85.98299,57.192691,55.006088,72.486686,94.986823
14.0,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima,Ikaalisten Nouseva-Voima,Ikaalisten Nouseva-Voima,...,62.576465,66.628415,102.489651,70.737369,77.901033,84.792953,54.158064,62.080109,66.525176,101.902216


In [672]:
# remove teams missing some runners
print(len(by_teams))
by_teams = by_teams[np.isfinite(by_teams.log_means[1]) & np.isfinite(by_teams.log_means[2]) & np.isfinite(by_teams.log_means[3]) & np.isfinite(by_teams.log_means[4]) & np.isfinite(by_teams.log_means[5]) & np.isfinite(by_teams.log_means[6]) & np.isfinite(by_teams.log_means[7])] 
print(len(by_teams))

1350
1333


In [673]:
# Sum of log normal variables is not defined 
# so we simulate 10000 runs for each user and sum them and then do statistics on simulated results 
def simulate_relay_estimates(row):
    samples = pd.DataFrame()
    for i in range(1,8):
        if np.isnan(row["log_means"][i]):
            print(row["log_means"])
            print(row["name"])
        samples[i] = row["leg_dist"][i] * lognorm.rvs(s = row["log_stdevs"][i], scale = np.exp(row["log_means"][i]), size = 10000)

    samples_sums = pd.DataFrame()
    # leg_1 
    # leg_1 + leg_2
    # leg_1 + leg_2 + leg_3
    # ...
    for i in range(1,8):
        samples_sums[i] = np.sum([ samples[j] for j in range(1,i+1) ], axis=0)

    start95 = samples_sums.quantile(0.025)
    end95 = samples_sums.quantile(0.975)
    medians = samples_sums.median()
    means = samples_sums.mean()
    
    sum_logs = np.log(samples_sums)
    sum_log_means = np.mean(sum_logs)
    sum_log_stds = np.std(sum_logs)
    
    """
    for i in range(1,8):
        bins = int(samples_sums[i].max() - samples_sums[i].min())
        name = row["name"][i]
        plt.title(f"{name} bins = {bins}")
        plt.hist(samples_sums[i], bins=bins)
        #plt.axvline(x=row["fin_real"][i], color="r")        
        plt.axvline(x=medians[i], color="g")
        plt.axvline(x=means[i], color="yellow")
        plt.show()    
    """
    
    """
    bins = int( (samples_sums.max().max() - samples_sums.min().min()) / 5) 
    plt.figure(figsize=(20, 6))
    plt.title(f"Whole team, bins = {bins}")
    plt.hist([samples_sums[1], samples_sums[2], samples_sums[3], samples_sums[4], samples_sums[5], samples_sums[6], samples_sums[7]], bins=bins)
    for i in range(1,8):
        #plt.axvline(x=row["fin_real"][i], color="r")
        plt.axvline(x=medians[i], color="g")

    plt.show()
    """
    
    fin_start95_dict = {f"fin_start95_{leg}" : start95.values[leg-1] for leg in range(1,8)}
    fin_end95_dict = {f"fin_end95_{leg}" : end95.values[leg-1] for leg in range(1,8)}
    fin_median_dict = {f"fin_median_{leg}" : medians.values[leg-1] for leg in range(1,8)}
    fin_mean_dict = {f"fin_mean_{leg}" : means.values[leg-1] for leg in range(1,8)}
    fin_sum_log_means_dict = {f"fin_sum_log_mean_{leg}" : sum_log_means.values[leg-1] for leg in range(1,8)}
    fin_sum_log_stds_dict = {f"fin_sum_log_std_{leg}" : sum_log_stds.values[leg-1] for leg in range(1,8)}
    new_cols = {**fin_start95_dict, **fin_end95_dict, **fin_median_dict, **fin_mean_dict, **fin_sum_log_means_dict, **fin_sum_log_stds_dict}

    #print(start95.values)
    #print(new_cols)
    return pd.Series(new_cols)

relay_estimates = by_teams.apply(simulate_relay_estimates, axis=1)
relay_estimates.head()

Unnamed: 0_level_0,fin_start95_1,fin_start95_2,fin_start95_3,fin_start95_4,fin_start95_5,fin_start95_6,fin_start95_7,fin_end95_1,fin_end95_2,fin_end95_3,...,fin_sum_log_mean_5,fin_sum_log_mean_6,fin_sum_log_mean_7,fin_sum_log_std_1,fin_sum_log_std_2,fin_sum_log_std_3,fin_sum_log_std_4,fin_sum_log_std_5,fin_sum_log_std_6,fin_sum_log_std_7
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5.0,61.068664,126.5909,197.342337,255.630388,313.619275,386.200031,471.005642,67.454065,154.299729,239.03922,...,5.825985,6.027624,6.215389,0.02543,0.051251,0.048885,0.041286,0.0403,0.03655,0.030895
6.0,51.9528,125.517711,203.374851,258.164184,313.117612,380.833066,474.867426,97.501689,188.58765,269.380627,...,5.850496,6.041755,6.246491,0.160905,0.103981,0.072135,0.061364,0.054612,0.051184,0.043697
11.0,57.355025,126.353889,196.555296,248.21866,297.117076,362.738504,452.142016,81.153273,164.935392,242.869797,...,5.78983,5.976598,6.187681,0.087765,0.067829,0.053058,0.043722,0.049813,0.043432,0.037818
13.0,59.585497,131.456669,207.902996,263.994792,318.581362,387.470731,477.730596,85.85777,162.411564,261.661048,...,5.844128,6.035649,6.241315,0.093665,0.053898,0.058684,0.048783,0.042189,0.039848,0.037022
14.0,58.245333,131.450892,208.187045,261.745595,320.291924,385.902974,480.98847,86.613889,169.785689,263.56783,...,5.860969,6.034773,6.253641,0.102432,0.065862,0.060422,0.050187,0.047281,0.040754,0.039182


In [674]:
len(relay_estimates)
relay_estimates[["fin_sum_log_mean_1", "fin_sum_log_std_1", "fin_sum_log_mean_2", "fin_sum_log_std_2", "fin_sum_log_mean_3", "fin_sum_log_std_3"]]

Unnamed: 0_level_0,fin_sum_log_mean_1,fin_sum_log_std_1,fin_sum_log_mean_2,fin_sum_log_std_2,fin_sum_log_mean_3,fin_sum_log_std_3
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
5.0,4.161076,0.025430,4.937422,0.051251,5.379561,0.048885
6.0,4.262478,0.160905,5.035708,0.103981,5.452881,0.072135
11.0,4.222085,0.087765,4.970126,0.067829,5.386286,0.053058
13.0,4.270654,0.093665,4.983181,0.053898,5.449487,0.058684
14.0,4.260353,0.102432,5.003780,0.065862,5.455936,0.060422
15.0,4.177000,0.032807,4.906872,0.047144,5.371497,0.051503
17.0,4.184629,0.074222,4.928229,0.040963,5.351670,0.033275
18.0,4.247732,0.092575,4.996102,0.075415,5.413634,0.057294
20.0,4.338449,0.115250,5.080502,0.070404,5.498004,0.051450
23.0,4.230453,0.034863,5.072246,0.050141,5.468641,0.039135


In [675]:
# Flatten the troublesome multi-index to field_{leg} etc...
by_teams_flat = by_teams.copy()
by_teams_flat.columns = [f'{x[0]}_{x[1]}' for x in by_teams_flat.columns]
by_teams_flat.reset_index()


Unnamed: 0,team_id,team_1,team_2,team_3,team_4,team_5,team_6,team_7,team_base_name_1,team_base_name_2,...,ind_mean_5,ind_mean_6,ind_mean_7,ind_median_1,ind_median_2,ind_median_3,ind_median_4,ind_median_5,ind_median_6,ind_median_7
0,5.0,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK,Halden SK,...,62.097304,75.794950,85.633899,64.133501,75.142332,77.273814,59.881118,61.654866,75.521582,85.578863
1,6.0,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat,Helsingin Suunnistajat,...,56.917205,73.143192,95.454671,71.067530,81.893692,79.259454,56.622836,56.567705,72.479759,95.225971
2,11.0,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne,OK Linne,...,56.721972,67.114796,92.568771,68.047577,75.630890,74.017768,51.993243,55.593390,66.933450,92.321787
3,13.0,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot,Vehkalahden Veikot,...,55.101333,72.915405,95.417328,71.504227,74.174520,85.982990,57.192691,55.006088,72.486686,94.986823
4,14.0,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima,Ikaalisten Nouseva-Voima,...,62.576465,66.628415,102.489651,70.737369,77.901033,84.792953,54.158064,62.080109,66.525176,101.902216
5,15.0,Vaajakosken Terä 1,Vaajakosken Terä 1,Vaajakosken Terä 1,Vaajakosken Terä 1,Vaajakosken Terä 1,Vaajakosken Terä 1,Vaajakosken Terä 1,Vaajakosken Terä,Vaajakosken Terä,...,58.325239,67.159586,95.230899,65.156383,69.936300,79.595441,51.979770,57.600834,66.807875,95.037990
6,17.0,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving,IL Tyrving,...,57.333536,67.761076,84.976476,65.716365,72.341834,72.698226,53.732789,57.079184,67.619799,84.945242
7,18.0,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK,Nydalens SK,...,50.992857,65.046605,94.260852,69.885691,77.440836,76.428838,55.973405,50.808000,64.810800,93.949004
8,20.0,Ol Norska 1,Ol Norska 1,Ol Norska 1,Ol Norska 1,Ol Norska 1,Ol Norska 1,Ol Norska 1,Ol Norska,Ol Norska,...,67.481616,69.687017,89.815552,76.561403,83.756426,83.105220,57.284005,67.366112,69.627537,89.489700
9,23.0,Lillomarka OL 1,Lillomarka OL 1,Lillomarka OL 1,Lillomarka OL 1,Lillomarka OL 1,Lillomarka OL 1,Lillomarka OL 1,Lillomarka OL,Lillomarka OL,...,62.420170,75.043866,99.447056,68.730625,90.487600,77.493365,53.892610,62.145386,74.922140,99.161863


In [676]:
estimates = pd.concat([by_teams_flat, relay_estimates], axis=1, join='inner')

In [677]:
# Convert minutes to date and times
start_timestamp = pd.Timestamp(year = 2018, month = 6, day = 16, hour = 23)

for leg in range(1,8):
    estimates[f"fint_median_{leg}"] = pd.to_datetime(estimates[f"fin_median_{leg}"] * 60, unit = "s", origin= start_timestamp)
    estimates[f"fint_start95_{leg}"] = pd.to_datetime(estimates[f"fin_start95_{leg}"] * 60, unit = "s", origin= start_timestamp)
    estimates[f"fint_end95_{leg}"] = pd.to_datetime(estimates[f"fin_end95_{leg}"] * 60, unit = "s", origin= start_timestamp)


In [678]:
# Sort teams by estimated total time 
estimates = estimates.sort_values("fin_median_7")

estimates.to_csv('data/team_estimates_ju2018.tsv', sep="\t")

In [679]:
estimates[["team_1", "fin_median_7"]].head(10)

Unnamed: 0_level_0,team_1,fin_median_7
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1
17.0,IL Tyrving 1,474.847187
35.0,OLV Baselland 1,482.001957
11.0,OK Linne 1,486.295016
15.0,Vaajakosken Terä 1,488.283084
18.0,Nydalens SK 1,491.038974
5.0,Halden SK 1,500.520441
27.0,OK Trian 1,503.928476
34.0,IGTISA 1,506.527382
13.0,Vehkalahden Veikot 1,513.441705
37.0,Järla Orientering 1,515.760262


In [680]:
teams_to_follow = estimates[estimates['team_1'].str.contains("Reak") | estimates['team_1'].str.contains("Puskasil") | estimates['team_1'].str.contains("Rastihaukat")]
teams_to_follow[["team_1", "fin_median_7", "fin_start95_7", "fin_end95_7"]]

Unnamed: 0_level_0,team_1,fin_median_7,fin_start95_7,fin_end95_7
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
429.0,Reaktor Innovations 1,745.53022,699.702715,796.720908
568.0,Rastihaukat 1,751.49819,697.886952,809.584257
1270.0,Reaktor Innovations 2,884.108108,795.57993,991.770455
1246.0,Rastihaukat 2,909.373061,832.156951,994.759576
1131.0,Puskasilimät OK 1,913.631956,854.613051,978.164262


In [681]:
leg_1_cols = list(filter(lambda c: "_1" in c,estimates.columns.values))
column_base_names = list(map(lambda c: c[:-2], leg_1_cols))
runner_estimates = pd.wide_to_long(estimates.reset_index(), stubnames=column_base_names, i ="team_id", j="leg", sep = "_").sort_values(by=['team_id', 'leg'])
runner_estimates = runner_estimates.drop(['team_base_name', 'estimated_log_means', 'estimated_log_stdevs'], axis=1)
runner_estimates.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,team,leg_dist,name,history_log_means,history_log_stdevs,log_means,log_stdevs,ind_95_start,ind_95_end,ind_mean,ind_median,fin_start95,fin_end95,fin_median,fin_mean,fin_sum_log_mean,fin_sum_log_std,fint_median,fint_start95,fint_end95
team_id,leg,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5.0,1,Halden SK 1,11.0,Emil Wingstedt,1.763072,0.025436,1.763072,0.025436,61.014607,67.411824,64.154251,64.133501,61.068664,67.454065,64.138783,64.161273,4.161076,0.02543,2018-06-17 00:04:08.326966,2018-06-17 00:01:04.119833,2018-06-17 00:07:27.243877
5.0,2,Halden SK 1,11.9,Fredrik Eliasson,1.842846,0.093588,1.842846,0.093588,62.549244,90.270797,75.472132,75.142332,126.5909,154.299729,139.203952,139.593997,4.937422,0.051251,2018-06-17 01:19:12.237107,2018-06-17 01:06:35.454018,2018-06-17 01:34:17.983761
5.0,3,Halden SK 1,12.8,Thomas Natvig Årstad,1.79791,0.1036,1.79791,0.1036,63.0736,94.671024,77.689619,77.273814,197.342337,239.03922,216.734449,217.186618,5.379561,0.048885,2018-06-17 02:36:44.066927,2018-06-17 02:17:20.540203,2018-06-17 02:59:02.353220
5.0,4,Halden SK 1,8.7,Alexander Chepelin,1.929038,0.069687,1.929038,0.069687,52.236299,68.64476,60.026693,59.881118,255.630388,300.999986,276.884048,277.202747,5.623896,0.041286,2018-06-17 03:36:53.042887,2018-06-17 03:15:37.823259,2018-06-17 04:00:59.999132
5.0,5,Halden SK 1,8.7,Riccardo Scalet,1.958229,0.119586,1.958229,0.119586,48.772555,77.939788,62.097304,61.654866,313.619275,366.825881,339.002434,339.270516,5.825985,0.0403,2018-06-17 04:39:00.146036,2018-06-17 04:13:37.156487,2018-06-17 05:06:49.552883


In [682]:
runner_estimates.to_csv('data/runner_estimates_ju2018.tsv', sep="\t")

In [683]:
runner_estimates[runner_estimates['team'].str.contains("Reak")][["name", "log_means", "log_stdevs", "ind_median"]]


Unnamed: 0_level_0,Unnamed: 1_level_0,name,log_means,log_stdevs,ind_median
team_id,leg,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
429.0,1,Mikko Peltonen,2.184951,0.081299,97.792383
429.0,2,Pasi Huhtiniemi,2.222239,0.072482,109.812846
429.0,3,Joni Freeman,2.130879,0.113846,107.805019
429.0,4,Tuomas Kareinen,2.29324,0.16007,86.19078
429.0,5,Janne Vaittinen,2.324384,0.039861,88.917301
429.0,6,Olavi Kanerva,2.128667,0.092121,90.759473
429.0,7,Jyri Kytömäki,2.355319,0.048494,161.284847
1270.0,1,Antti-Ville Jokela,2.363116,0.10623,116.864
1270.0,2,Karri-Pekka Laakso,2.414677,0.081841,133.115223
1270.0,3,Oskari Pirttikoski,2.637533,0.194072,178.927107


In [684]:
for_print = runner_estimates.copy()
for_print = for_print.reset_index()
for_print = for_print.set_index('team_id')
for_print = for_print.round(2)
for_print.fint_median = for_print.fint_median.dt.strftime("%H:%M")
for_print.fint_start95 = for_print.fint_start95.dt.strftime("%H:%M")
for_print.fint_end95 = for_print.fint_end95.dt.strftime("%H:%M")
for_print = for_print[['team',
 'leg',
 'name',
 'ind_median', 
 'ind_95_start', 
 'ind_95_end', 
 'fin_median',
 'fint_median',
 'fint_start95',
 'fint_end95']]
for_print.head()

Unnamed: 0_level_0,team,leg,name,ind_median,ind_95_start,ind_95_end,fin_median,fint_median,fint_start95,fint_end95
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
5.0,Halden SK 1,1,Emil Wingstedt,64.13,61.01,67.41,64.14,00:04,00:01,00:07
5.0,Halden SK 1,2,Fredrik Eliasson,75.14,62.55,90.27,139.2,01:19,01:06,01:34
5.0,Halden SK 1,3,Thomas Natvig Årstad,77.27,63.07,94.67,216.73,02:36,02:17,02:59
5.0,Halden SK 1,4,Alexander Chepelin,59.88,52.24,68.64,276.88,03:36,03:15,04:00
5.0,Halden SK 1,5,Riccardo Scalet,61.65,48.77,77.94,339.0,04:39,04:13,05:06


In [685]:
for_print.to_csv('for_print_ju2018.tsv', sep="\t")

In [686]:
for_print[for_print['team'].str.contains("Reak") | for_print['team'].str.contains("Puskasil") | for_print['team'].str.contains("Rastihaukat 2")]


Unnamed: 0_level_0,team,leg,name,ind_median,ind_95_start,ind_95_end,fin_median,fint_median,fint_start95,fint_end95
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
429.0,Reaktor Innovations 1,1,Mikko Peltonen,97.79,83.39,114.68,98.02,00:38,00:23,00:54
429.0,Reaktor Innovations 1,2,Pasi Huhtiniemi,109.81,95.27,126.58,208.24,02:28,02:06,02:50
429.0,Reaktor Innovations 1,3,Joni Freeman,107.81,86.24,134.75,316.67,04:16,03:45,04:51
429.0,Reaktor Innovations 1,4,Tuomas Kareinen,86.19,62.98,117.95,404.13,05:44,05:03,06:29
429.0,Reaktor Innovations 1,5,Janne Vaittinen,88.92,82.24,96.14,493.0,07:12,06:31,07:59
429.0,Reaktor Innovations 1,6,Olavi Kanerva,90.76,75.77,108.72,584.14,08:44,07:59,09:32
429.0,Reaktor Innovations 1,7,Jyri Kytömäki,161.28,146.66,177.37,745.53,11:25,10:39,12:16
1131.0,Puskasilimät OK 1,1,Tuomas Ketonen,124.33,101.43,152.41,124.34,01:04,00:41,01:33
1131.0,Puskasilimät OK 1,2,Petri Miettinen,131.06,122.12,140.64,255.67,03:15,02:50,03:46
1131.0,Puskasilimät OK 1,3,Jaakko Havola,146.75,135.82,158.57,402.67,05:42,05:14,06:14


In [687]:
for_web = runner_estimates.copy().reset_index()
for_web = for_web[[
 'team_id',
 'leg',
 'team',
 'name',
 'fin_sum_log_mean', 
 'fin_sum_log_std',
 'fin_median',
 'fint_median']]
for_web

Unnamed: 0,team_id,leg,team,name,fin_sum_log_mean,fin_sum_log_std,fin_median,fint_median
0,5.0,1,Halden SK 1,Emil Wingstedt,4.161076,0.025430,64.138783,2018-06-17 00:04:08.326966000
1,5.0,2,Halden SK 1,Fredrik Eliasson,4.937422,0.051251,139.203952,2018-06-17 01:19:12.237107000
2,5.0,3,Halden SK 1,Thomas Natvig Årstad,5.379561,0.048885,216.734449,2018-06-17 02:36:44.066927000
3,5.0,4,Halden SK 1,Alexander Chepelin,5.623896,0.041286,276.884048,2018-06-17 03:36:53.042887000
4,5.0,5,Halden SK 1,Riccardo Scalet,5.825985,0.040300,339.002434,2018-06-17 04:39:00.146036000
5,5.0,6,Halden SK 1,Andreas Rüedlinger,6.027624,0.036550,414.837137,2018-06-17 05:54:50.228203000
6,5.0,7,Halden SK 1,Magne Dähli,6.215389,0.030895,500.520441,2018-06-17 07:20:31.226446000
7,6.0,1,Helsingin Suunnistajat 1,Tuomas Heikkilä,4.262478,0.160905,70.982160,2018-06-17 00:10:58.929612000
8,6.0,2,Helsingin Suunnistajat 1,Topi Syrjäläinen,5.035708,0.103981,153.619447,2018-06-17 01:33:37.166822000
9,6.0,3,Helsingin Suunnistajat 1,Einari Heinaro,5.452881,0.072135,233.145661,2018-06-17 02:53:08.739632000


In [688]:
for_web.to_json('web-lib/for_web_ju2018.json', orient="records")