In [627]:
import pandas as pd
import numpy as np
from scipy.stats import lognorm
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.metrics import mean_absolute_error

In [628]:
in_file_name = 'data/grouped_paces_ju.tsv'
df_all = pd.read_csv(in_file_name, delimiter="\t")
history = df_all

In [629]:
order18 = pd.read_csv('data/running_order_j2018_ju.tsv', delimiter="\t")

In [630]:
#paces = df.as_matrix(["pace_1", "pace_2", "pace_3", "pace_4", "pace_5", "pace_6"])
paces = history[["pace_1", "pace_2", "pace_3", "pace_4", "pace_5", "pace_6"]]
logs = np.log(paces)
means = np.nanmean(logs, axis=1)
stdevs = np.nanstd(logs, axis=1)
history = history.assign(log_means=pd.Series(means).values)
history = history.assign(log_stdevs=pd.Series(stdevs).values)

  after removing the cwd from sys.path.
  keepdims=keepdims)


In [631]:
# Estimate values for all, but only use them if no history is available
with_history = history[np.isfinite(history.pace_2)]
x = with_history.mean_team_id.values
x = x.reshape(len(x), 1)

log_means = with_history.log_means.values.reshape(len(with_history.log_means), 1)
log_means_model = linear_model.LinearRegression()
log_means_model.fit(x, log_means)

estimated_log_means = log_means_model.predict(order18.team_id.values.reshape(len(order18.team_id), 1))
order18 = order18.assign(estimated_log_means=estimated_log_means)

log_stdevs = with_history.log_stdevs.values.reshape(len(with_history.log_stdevs), 1)
log_stdevs_model = linear_model.LinearRegression()
log_stdevs_model.fit(x, log_stdevs)

estimated_log_stdevs = log_stdevs_model.predict(order18.team_id.values.reshape(len(order18.team_id), 1))
order18 = order18.assign(estimated_log_stdevs=estimated_log_stdevs)
order18.head()

Unnamed: 0,team_id,team,team_base_name,leg,leg_dist,name,estimated_log_means,estimated_log_stdevs
0,5,Halden SK 1,Halden SK,1,11.0,Emil Wingstedt,1.944872,0.085008
1,5,Halden SK 1,Halden SK,2,11.9,Fredrik Eliasson,1.944872,0.085008
2,5,Halden SK 1,Halden SK,3,12.8,Thomas Natvig Årstad,1.944872,0.085008
3,5,Halden SK 1,Halden SK,4,8.7,Alexander Chepelin,1.944872,0.085008
4,5,Halden SK 1,Halden SK,5,8.7,Riccardo Scalet,1.944872,0.085008


In [632]:
# Combine history with 2018 runners 
no_history_row = pd.DataFrame([[0,0]], columns=["log_means", "log_stdevs"])
def get_history_row(running_order_row):
    name = running_order_row["name"].lower()
    
    by_name = history[history['name'] == name]
    by_name_and_colon = history[history['name'].str.contains(name + ":", regex=False)]

    runners = by_name.append(by_name_and_colon)
    if(len(runners) == 1):
        return runners
    team_name = running_order_row["team_base_name"].upper()
    runners = runners[runners['teams'].str.contains(team_name, regex=False)]
    if(len(runners) == 1):
        return runners
    if(len(runners) == 0):
        return no_history_row
    print(f"name '{name}' team_name '{team_name}'")
    print(f"by_name {len(by_name)} by_name_and_colon {len(by_name_and_colon)} runners {len(runners)}")
    print(f"Duplicate runner {runners}")
    #print(f"TEAMS by_name_and_colon {by_name_and_colon['teams']}")
    return runners.sort_values("num_runs", ascending = False).head(1)

def get_estimate_params(running_order_row):
    history_row = get_history_row(running_order_row)
    #print(f"estimate_row log_means {history_row.log_means} {history_row.log_stdevs}")
    log_means = history_row.log_means.values[0]
    log_stdevs = history_row.log_stdevs.values[0]
    return pd.Series({"history_log_means": log_means, "history_log_stdevs": log_stdevs})

#order18 = order18[order18['team'].str.contains("Reak") | order18['team'].str.contains("Puskasil") | order18['team'].str.contains("Rastihaukat")]
estimate_params = order18.apply(lambda row: get_estimate_params(row), axis=1)
order18 = order18.assign(history_log_means = estimate_params.history_log_means)
order18 = order18.assign(history_log_stdevs = estimate_params.history_log_stdevs)

In [633]:
order18['log_means'] = np.where(np.isfinite(order18["history_log_means"]) & order18["history_log_means"] > 0, order18["history_log_means"], order18["estimated_log_means"])
order18['log_stdevs'] = np.where(np.isfinite(order18["history_log_stdevs"]) & order18["history_log_stdevs"] > 0, order18["history_log_stdevs"], order18["estimated_log_stdevs"])
order18.head()


Unnamed: 0,team_id,team,team_base_name,leg,leg_dist,name,estimated_log_means,estimated_log_stdevs,history_log_means,history_log_stdevs,log_means,log_stdevs
0,5,Halden SK 1,Halden SK,1,11.0,Emil Wingstedt,1.944872,0.085008,1.763072,0.025436,1.763072,0.025436
1,5,Halden SK 1,Halden SK,2,11.9,Fredrik Eliasson,1.944872,0.085008,1.842846,0.093588,1.842846,0.093588
2,5,Halden SK 1,Halden SK,3,12.8,Thomas Natvig Årstad,1.944872,0.085008,1.79791,0.1036,1.79791,0.1036
3,5,Halden SK 1,Halden SK,4,8.7,Alexander Chepelin,1.944872,0.085008,1.929038,0.069687,1.929038,0.069687
4,5,Halden SK 1,Halden SK,5,8.7,Riccardo Scalet,1.944872,0.085008,1.958229,0.119586,1.958229,0.119586


In [634]:
# Calculate personal estimates
# s = sigma and scale = exp(mu).

log_means = np.exp(order18['log_means']) 
log_stdevs = order18['log_stdevs']

intervals95 = lognorm.interval(0.95, s = log_stdevs, scale = log_means)
means = lognorm.mean(s = log_stdevs, scale = log_means)
medians = lognorm.median(s = log_stdevs, scale = log_means)

In [635]:
order18 = order18.assign(ind_95_start = pd.Series(intervals95[0] * order18.leg_dist).values)
order18 = order18.assign(ind_95_end = pd.Series(intervals95[1] * order18.leg_dist).values)
order18 = order18.assign(ind_mean = pd.Series(means * order18.leg_dist).values)
order18 = order18.assign(ind_median = pd.Series(medians * order18.leg_dist).values)

In [636]:
by_teams = order18.set_index(["team_id", "leg"]).unstack()
by_teams.head()

Unnamed: 0_level_0,team,team,team,team,team,team,team,team_base_name,team_base_name,team_base_name,...,ind_mean,ind_mean,ind_mean,ind_median,ind_median,ind_median,ind_median,ind_median,ind_median,ind_median
leg,1,2,3,4,5,6,7,1,2,3,...,5,6,7,1,2,3,4,5,6,7
team_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
5,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK,Halden SK,Halden SK,...,62.097304,75.79495,85.633899,64.133501,75.142332,77.273814,59.881118,61.654866,75.521582,85.578863
11,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne,OK Linne,OK Linne,...,56.721972,67.114796,92.568771,68.047577,75.63089,74.017768,51.993243,55.59339,66.93345,92.321787
14,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima,Ikaalisten Nouseva-Voima,Ikaalisten Nouseva-Voima,...,62.576465,66.628415,102.489651,70.737369,77.901033,84.792953,54.158064,62.080109,66.525176,101.902216
17,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving,IL Tyrving,IL Tyrving,...,57.333536,67.761076,84.976476,65.716365,72.341834,72.698226,53.732789,57.079184,67.619799,84.945242
18,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK,Nydalens SK,Nydalens SK,...,50.992857,65.046605,94.260852,69.885691,77.440836,76.428838,55.973405,50.808,64.8108,93.949004


In [637]:
# remove teams missing some runners
print(len(by_teams))
by_teams = by_teams[np.isfinite(by_teams.log_means[1]) & np.isfinite(by_teams.log_means[2]) & np.isfinite(by_teams.log_means[3]) & np.isfinite(by_teams.log_means[4]) & np.isfinite(by_teams.log_means[5]) & np.isfinite(by_teams.log_means[6]) & np.isfinite(by_teams.log_means[7])] 
print(len(by_teams))

1103
1080


In [638]:
# Sum of log normal variables is not defined 
# so we simulate 10000 runs for each user and sum them and then do statistics on simulated results 
def simulate_relay_estimates(row):
    samples = pd.DataFrame()
    for i in range(1,8):
        if np.isnan(row["log_means"][i]):
            print(row["log_means"])
            print(row["name"])
        samples[i] = row["leg_dist"][i] * lognorm.rvs(s = row["log_stdevs"][i], scale = np.exp(row["log_means"][i]), size = 10000)

    samples_sums = pd.DataFrame()
    # leg_1 
    # leg_1 + leg_2
    # leg_1 + leg_2 + leg_3
    # ...
    for i in range(1,8):
        samples_sums[i] = np.sum([ samples[j] for j in range(1,i+1) ], axis=0)

    start95 = samples_sums.quantile(0.025)
    end95 = samples_sums.quantile(0.975)
    medians = samples_sums.median()
    means = samples_sums.mean()
    
    sum_logs = np.log(samples_sums)
    sum_log_means = np.mean(sum_logs)
    sum_log_stds = np.std(sum_logs)
    
    """
    for i in range(1,8):
        bins = int(samples_sums[i].max() - samples_sums[i].min())
        name = row["name"][i]
        plt.title(f"{name} bins = {bins}")
        plt.hist(samples_sums[i], bins=bins)
        #plt.axvline(x=row["fin_real"][i], color="r")        
        plt.axvline(x=medians[i], color="g")
        plt.axvline(x=means[i], color="yellow")
        plt.show()    
    """
    
    """
    bins = int( (samples_sums.max().max() - samples_sums.min().min()) / 5) 
    plt.figure(figsize=(20, 6))
    plt.title(f"Whole team, bins = {bins}")
    plt.hist([samples_sums[1], samples_sums[2], samples_sums[3], samples_sums[4], samples_sums[5], samples_sums[6], samples_sums[7]], bins=bins)
    for i in range(1,8):
        #plt.axvline(x=row["fin_real"][i], color="r")
        plt.axvline(x=medians[i], color="g")

    plt.show()
    """
    
    fin_start95_dict = {f"fin_start95_{leg}" : start95.values[leg-1] for leg in range(1,8)}
    fin_end95_dict = {f"fin_end95_{leg}" : end95.values[leg-1] for leg in range(1,8)}
    fin_median_dict = {f"fin_median_{leg}" : medians.values[leg-1] for leg in range(1,8)}
    fin_mean_dict = {f"fin_mean_{leg}" : means.values[leg-1] for leg in range(1,8)}
    fin_sum_log_means_dict = {f"fin_sum_log_mean_{leg}" : sum_log_means.values[leg-1] for leg in range(1,8)}
    fin_sum_log_stds_dict = {f"fin_sum_log_std_{leg}" : sum_log_stds.values[leg-1] for leg in range(1,8)}
    new_cols = {**fin_start95_dict, **fin_end95_dict, **fin_median_dict, **fin_mean_dict, **fin_sum_log_means_dict, **fin_sum_log_stds_dict}

    #print(start95.values)
    #print(new_cols)
    return pd.Series(new_cols)

relay_estimates = by_teams.apply(simulate_relay_estimates, axis=1)
relay_estimates.head()

Unnamed: 0_level_0,fin_start95_1,fin_start95_2,fin_start95_3,fin_start95_4,fin_start95_5,fin_start95_6,fin_start95_7,fin_end95_1,fin_end95_2,fin_end95_3,...,fin_sum_log_mean_5,fin_sum_log_mean_6,fin_sum_log_mean_7,fin_sum_log_std_1,fin_sum_log_std_2,fin_sum_log_std_3,fin_sum_log_std_4,fin_sum_log_std_5,fin_sum_log_std_6,fin_sum_log_std_7
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5,60.941195,126.383686,197.083336,255.179316,312.999904,386.320286,471.315829,67.423464,154.778261,239.665396,...,5.826917,6.028734,6.216316,0.025649,0.051939,0.050357,0.042362,0.041117,0.036754,0.03103
11,57.276225,126.053227,196.440692,247.979157,296.874554,362.467244,452.232376,81.00174,164.180023,242.686463,...,5.790118,5.976999,6.187693,0.088019,0.067782,0.053707,0.044329,0.050158,0.043502,0.037802
14,57.850906,130.996156,208.339678,261.556283,320.340103,386.119877,482.225851,86.604476,169.439157,263.702023,...,5.86139,6.035225,6.254755,0.102325,0.065848,0.060341,0.050209,0.046977,0.040432,0.038726
17,56.497435,127.437844,197.545828,248.309112,302.620906,368.692201,453.385716,76.228464,149.893898,225.473293,...,5.775215,5.966115,6.16331,0.075456,0.041218,0.033485,0.033013,0.032108,0.028722,0.024039
18,58.249694,127.799372,201.202946,255.65836,305.04039,368.198407,458.666798,83.799862,170.52988,250.692645,...,5.804426,5.983448,6.196508,0.092148,0.074212,0.056347,0.048138,0.042901,0.0383,0.034584


In [639]:
len(relay_estimates)
relay_estimates[["fin_sum_log_mean_1", "fin_sum_log_std_1", "fin_sum_log_mean_2", "fin_sum_log_std_2", "fin_sum_log_mean_3", "fin_sum_log_std_3"]]

Unnamed: 0_level_0,fin_sum_log_mean_1,fin_sum_log_std_1,fin_sum_log_mean_2,fin_sum_log_std_2,fin_sum_log_mean_3,fin_sum_log_std_3
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
5,4.161056,0.025649,4.938247,0.051939,5.380190,0.050357
11,4.219444,0.088019,4.969686,0.067782,5.385705,0.053707
14,4.259644,0.102325,5.003901,0.065848,5.456536,0.060341
17,4.184785,0.075456,4.928231,0.041218,5.351632,0.033485
18,4.246103,0.092148,4.994718,0.074212,5.414058,0.056347
26,4.219299,0.091361,4.977609,0.044300,5.422022,0.045161
28,4.442320,0.086102,5.119145,0.092990,5.510533,0.068426
32,4.364302,0.123580,5.031964,0.082959,5.457536,0.058048
35,4.139832,0.086115,4.937183,0.060698,5.404917,0.038076
37,4.243491,0.051673,4.939196,0.055502,5.413308,0.041579


In [640]:
# Flatten the troublesome multi-index to field_{leg} etc...
by_teams_flat = by_teams.copy()
by_teams_flat.columns = [f'{x[0]}_{x[1]}' for x in by_teams_flat.columns]
by_teams_flat.reset_index()


Unnamed: 0,team_id,team_1,team_2,team_3,team_4,team_5,team_6,team_7,team_base_name_1,team_base_name_2,...,ind_mean_5,ind_mean_6,ind_mean_7,ind_median_1,ind_median_2,ind_median_3,ind_median_4,ind_median_5,ind_median_6,ind_median_7
0,5,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK,Halden SK,...,62.097304,75.794950,85.633899,64.133501,75.142332,77.273814,59.881118,61.654866,75.521582,85.578863
1,11,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne,OK Linne,...,56.721972,67.114796,92.568771,68.047577,75.630890,74.017768,51.993243,55.593390,66.933450,92.321787
2,14,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima,Ikaalisten Nouseva-Voima,...,62.576465,66.628415,102.489651,70.737369,77.901033,84.792953,54.158064,62.080109,66.525176,101.902216
3,17,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving,IL Tyrving,...,57.333536,67.761076,84.976476,65.716365,72.341834,72.698226,53.732789,57.079184,67.619799,84.945242
4,18,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK,Nydalens SK,...,50.992857,65.046605,94.260852,69.885691,77.440836,76.428838,55.973405,50.808000,64.810800,93.949004
5,26,NTNUI 1,NTNUI 1,NTNUI 1,NTNUI 1,NTNUI 1,NTNUI 1,NTNUI 1,NTNUI,NTNUI,...,61.617233,72.004349,101.795724,68.011089,76.987816,80.864879,66.259730,61.393158,71.633572,101.693527
6,28,Angelniemen Ankkuri 1,Angelniemen Ankkuri 1,Angelniemen Ankkuri 1,Angelniemen Ankkuri 1,Angelniemen Ankkuri 1,Angelniemen Ankkuri 1,Angelniemen Ankkuri 1,Angelniemen Ankkuri,Angelniemen Ankkuri,...,58.784104,67.740436,108.455636,85.074000,81.385672,79.706644,55.722948,58.474682,67.436225,108.060920
7,32,OK Orion 1,OK Orion 1,OK Orion 1,OK Orion 1,OK Orion 1,OK Orion 1,OK Orion 1,OK Orion,OK Orion,...,58.861257,76.690217,97.081819,78.620226,74.145082,81.015163,58.979693,58.646700,76.410671,96.672657
8,35,OLV Baselland 1,OLV Baselland 1,OLV Baselland 1,OLV Baselland 1,OLV Baselland 1,OLV Baselland 1,OLV Baselland 1,OLV Baselland,OLV Baselland,...,57.324702,61.586819,86.896183,62.876000,76.362300,83.038686,53.400600,57.115500,61.538667,86.667308
9,37,Järla Orientering 1,Järla Orientering 1,Järla Orientering 1,Järla Orientering 1,Järla Orientering 1,Järla Orientering 1,Järla Orientering 1,Järla Orientering,Järla Orientering,...,58.862402,78.894655,95.667554,69.616274,69.794621,84.531260,57.710983,58.602492,78.501248,95.300935


In [641]:
estimates = pd.concat([by_teams_flat, relay_estimates], axis=1, join='inner')

In [642]:
# Convert minutes to date and times
start_timestamp = pd.Timestamp(year = 2018, month = 6, day = 16, hour = 23)

for leg in range(1,8):
    estimates[f"fint_median_{leg}"] = pd.to_datetime(estimates[f"fin_median_{leg}"] * 60, unit = "s", origin= start_timestamp)
    estimates[f"fint_start95_{leg}"] = pd.to_datetime(estimates[f"fin_start95_{leg}"] * 60, unit = "s", origin= start_timestamp)
    estimates[f"fint_end95_{leg}"] = pd.to_datetime(estimates[f"fin_end95_{leg}"] * 60, unit = "s", origin= start_timestamp)


In [643]:
# Sort teams by estimated total time 
estimates = estimates.sort_values("fin_median_7")

estimates.to_csv('data/team_estimates_ju2018.tsv', sep="\t")

In [644]:
estimates[["team_1", "fin_median_7"]].head(10)

Unnamed: 0_level_0,team_1,fin_median_7
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1
17,IL Tyrving 1,475.009267
35,OLV Baselland 1,481.967757
11,OK Linne 1,486.455304
18,Nydalens SK 1,490.930802
5,Halden SK 1,500.660801
37,Järla Orientering 1,515.993759
14,Ikaalisten Nouseva-Voima 1,520.603481
150,IL Tyrving 2,526.079246
32,OK Orion 1,526.680712
26,NTNUI 1,529.109118


In [645]:
teams_to_follow = estimates[estimates['team_1'].str.contains("Reak") | estimates['team_1'].str.contains("Puskasil") | estimates['team_1'].str.contains("Rastihaukat")]
teams_to_follow[["team_1", "fin_median_7", "fin_start95_7", "fin_end95_7"]]

Unnamed: 0_level_0,team_1,fin_median_7,fin_start95_7,fin_end95_7
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
429,Reaktor Innovations 1,744.993832,697.861608,797.586145
568,Rastihaukat 1,752.618464,698.750796,810.092776
1270,Reaktor Innovations 2,883.602878,796.114177,990.572382
1246,Rastihaukat 2,909.593489,831.641398,995.214894
1131,Puskasilimät OK 1,912.664073,854.324202,975.026698


In [646]:
leg_1_cols = list(filter(lambda c: "_1" in c,estimates.columns.values))
column_base_names = list(map(lambda c: c[:-2], leg_1_cols))
runner_estimates = pd.wide_to_long(estimates.reset_index(), stubnames=column_base_names, i ="team_id", j="leg", sep = "_").sort_values(by=['team_id', 'leg'])
runner_estimates = runner_estimates.drop(['team_base_name', 'estimated_log_means', 'estimated_log_stdevs'], axis=1)
runner_estimates.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,team,leg_dist,name,history_log_means,history_log_stdevs,log_means,log_stdevs,ind_95_start,ind_95_end,ind_mean,ind_median,fin_start95,fin_end95,fin_median,fin_mean,fin_sum_log_mean,fin_sum_log_std,fint_median,fint_start95,fint_end95
team_id,leg,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5,1,Halden SK 1,11.0,Emil Wingstedt,1.763072,0.025436,1.763072,0.025436,61.014607,67.411824,64.154251,64.133501,60.941195,67.423464,64.143575,64.160326,4.161056,0.025649,2018-06-17 00:04:08.614484,2018-06-17 00:00:56.471699,2018-06-17 00:07:25.407826
5,2,Halden SK 1,11.9,Fredrik Eliasson,1.842846,0.093588,1.842846,0.093588,62.549244,90.270797,75.472132,75.142332,126.383686,154.778261,139.428258,139.714078,4.938247,0.051939,2018-06-17 01:19:25.695451,2018-06-17 01:06:23.021169,2018-06-17 01:34:46.695672
5,3,Halden SK 1,12.8,Thomas Natvig Årstad,1.79791,0.1036,1.79791,0.1036,63.0736,94.671024,77.689619,77.273814,197.083336,239.665396,216.943106,217.339309,5.38019,0.050357,2018-06-17 02:36:56.586359,2018-06-17 02:17:05.000161,2018-06-17 02:59:39.923789
5,4,Halden SK 1,8.7,Alexander Chepelin,1.929038,0.069687,1.929038,0.069687,52.236299,68.64476,60.026693,59.881118,255.179316,301.078958,277.093401,277.365197,5.624437,0.042362,2018-06-17 03:37:05.604081,2018-06-17 03:15:10.758942,2018-06-17 04:01:04.737494
5,5,Halden SK 1,8.7,Riccardo Scalet,1.958229,0.119586,1.958229,0.119586,48.772555,77.939788,62.097304,61.654866,312.999904,368.391323,339.404082,339.598066,5.826917,0.041117,2018-06-17 04:39:24.244937,2018-06-17 04:12:59.994236,2018-06-17 05:08:23.479403


In [647]:
runner_estimates.to_csv('data/runner_estimates_ju2018.tsv', sep="\t")

In [648]:
runner_estimates[runner_estimates['team'].str.contains("Reak")][["name", "log_means", "log_stdevs", "ind_median"]]


Unnamed: 0_level_0,Unnamed: 1_level_0,name,log_means,log_stdevs,ind_median
team_id,leg,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
429,1,Mikko Peltonen,2.184951,0.081299,97.792383
429,2,Pasi Huhtiniemi,2.222239,0.072482,109.812846
429,3,Joni Freeman,2.130879,0.113846,107.805019
429,4,Tuomas Kareinen,2.29324,0.16007,86.19078
429,5,Janne Vaittinen,2.324384,0.039861,88.917301
429,6,Olavi Kanerva,2.128667,0.092121,90.759473
429,7,Jyri Kytömäki,2.355319,0.048494,161.284847
1270,1,Antti-Ville Jokela,2.363116,0.10623,116.864
1270,2,Karri-Pekka Laakso,2.414677,0.081841,133.115223
1270,3,Oskari Pirttikoski,2.637533,0.194072,178.927107


In [649]:
for_print = runner_estimates.copy()
for_print = for_print.reset_index()
for_print = for_print.set_index('team_id')
for_print = for_print.round(2)
for_print.fint_median = for_print.fint_median.dt.strftime("%H:%M")
for_print.fint_start95 = for_print.fint_start95.dt.strftime("%H:%M")
for_print.fint_end95 = for_print.fint_end95.dt.strftime("%H:%M")
for_print = for_print[['team',
 'leg',
 'name',
 'ind_median', 
 'ind_95_start', 
 'ind_95_end', 
 'fin_median',
 'fint_median',
 'fint_start95',
 'fint_end95']]
for_print.head()

Unnamed: 0_level_0,team,leg,name,ind_median,ind_95_start,ind_95_end,fin_median,fint_median,fint_start95,fint_end95
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
5,Halden SK 1,1,Emil Wingstedt,64.13,61.01,67.41,64.14,00:04,00:00,00:07
5,Halden SK 1,2,Fredrik Eliasson,75.14,62.55,90.27,139.43,01:19,01:06,01:34
5,Halden SK 1,3,Thomas Natvig Årstad,77.27,63.07,94.67,216.94,02:36,02:17,02:59
5,Halden SK 1,4,Alexander Chepelin,59.88,52.24,68.64,277.09,03:37,03:15,04:01
5,Halden SK 1,5,Riccardo Scalet,61.65,48.77,77.94,339.4,04:39,04:12,05:08


In [650]:
for_print.to_csv('for_print_ju2018.tsv', sep="\t")

In [651]:
for_print[for_print['team'].str.contains("Reak") | for_print['team'].str.contains("Puskasil") | for_print['team'].str.contains("Rastihaukat 2")]


Unnamed: 0_level_0,team,leg,name,ind_median,ind_95_start,ind_95_end,fin_median,fint_median,fint_start95,fint_end95
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
429,Reaktor Innovations 1,1,Mikko Peltonen,97.79,83.39,114.68,97.81,00:37,00:23,00:54
429,Reaktor Innovations 1,2,Pasi Huhtiniemi,109.81,95.27,126.58,208.09,02:28,02:07,02:51
429,Reaktor Innovations 1,3,Joni Freeman,107.81,86.24,134.75,316.24,04:16,03:45,04:51
429,Reaktor Innovations 1,4,Tuomas Kareinen,86.19,62.98,117.95,403.21,05:43,05:03,06:30
429,Reaktor Innovations 1,5,Janne Vaittinen,88.92,82.24,96.14,492.37,07:12,06:31,08:00
429,Reaktor Innovations 1,6,Olavi Kanerva,90.76,75.77,108.72,583.55,08:43,07:59,09:33
429,Reaktor Innovations 1,7,Jyri Kytömäki,161.28,146.66,177.37,744.99,11:24,10:37,12:17
1131,Puskasilimät OK 1,1,Tuomas Ketonen,124.33,101.43,152.41,123.9,01:03,00:41,01:32
1131,Puskasilimät OK 1,2,Petri Miettinen,131.06,122.12,140.64,254.97,03:14,02:50,03:44
1131,Puskasilimät OK 1,3,Jaakko Havola,146.75,135.82,158.57,401.72,05:41,05:14,06:13


In [652]:
for_web = runner_estimates.copy().reset_index()
for_web = for_web[[
 'team_id',
 'leg',
 'team',
 'name',
 'fin_sum_log_mean', 
 'fin_sum_log_std',
 'fin_median',
 'fint_median']]
for_web

Unnamed: 0,team_id,leg,team,name,fin_sum_log_mean,fin_sum_log_std,fin_median,fint_median
0,5,1,Halden SK 1,Emil Wingstedt,4.161056,0.025649,64.143575,2018-06-17 00:04:08.614484
1,5,2,Halden SK 1,Fredrik Eliasson,4.938247,0.051939,139.428258,2018-06-17 01:19:25.695451
2,5,3,Halden SK 1,Thomas Natvig Årstad,5.380190,0.050357,216.943106,2018-06-17 02:36:56.586359
3,5,4,Halden SK 1,Alexander Chepelin,5.624437,0.042362,277.093401,2018-06-17 03:37:05.604081
4,5,5,Halden SK 1,Riccardo Scalet,5.826917,0.041117,339.404082,2018-06-17 04:39:24.244937
5,5,6,Halden SK 1,Andreas Rüedlinger,6.028734,0.036754,415.099063,2018-06-17 05:55:05.943754
6,5,7,Halden SK 1,Magne Dähli,6.216316,0.031030,500.660801,2018-06-17 07:20:39.648049
7,11,1,OK Linne 1,Vincent Coupat,4.219444,0.088019,67.939650,2018-06-17 00:07:56.378995
8,11,2,OK Linne 1,Jakob Andersson,4.969686,0.067782,143.957988,2018-06-17 01:23:57.479287
9,11,3,OK Linne 1,Rassmus Andersson,5.385705,0.053707,218.240969,2018-06-17 02:38:14.458168


In [653]:
for_web.to_json('web-lib/for_web_ju2018.json', orient="records")