In [690]:
import pandas as pd
import numpy as np
from scipy.stats import lognorm
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.metrics import mean_absolute_error

In [691]:
in_file_name = 'data/grouped_paces_ju.tsv'
df_all = pd.read_csv(in_file_name, delimiter="\t")
history = df_all

In [692]:
order18 = pd.read_csv('data/running_order_j2018_ju.tsv', delimiter="\t")
order18 = order18[np.isfinite(order18.team_id)]

In [693]:
#paces = df.as_matrix(["pace_1", "pace_2", "pace_3", "pace_4", "pace_5", "pace_6"])
paces = history[["pace_1", "pace_2", "pace_3", "pace_4", "pace_5", "pace_6"]]
logs = np.log(paces)
means = np.nanmean(logs, axis=1)
stdevs = np.nanstd(logs, axis=1)
history = history.assign(log_means=pd.Series(means).values)
history = history.assign(log_stdevs=pd.Series(stdevs).values)

  after removing the cwd from sys.path.
  keepdims=keepdims)


In [694]:
# Estimate values for all, but only use them if no history is available
with_history = history[np.isfinite(history.pace_2)]
x = with_history.mean_team_id.values
x = x.reshape(len(x), 1)

log_means = with_history.log_means.values.reshape(len(with_history.log_means), 1)
log_means_model = linear_model.LinearRegression()
log_means_model.fit(x, log_means)

estimated_log_means = log_means_model.predict(order18.team_id.values.reshape(len(order18.team_id), 1))
order18 = order18.assign(estimated_log_means=estimated_log_means)

log_stdevs = with_history.log_stdevs.values.reshape(len(with_history.log_stdevs), 1)
log_stdevs_model = linear_model.LinearRegression()
log_stdevs_model.fit(x, log_stdevs)

estimated_log_stdevs = log_stdevs_model.predict(order18.team_id.values.reshape(len(order18.team_id), 1))
order18 = order18.assign(estimated_log_stdevs=estimated_log_stdevs)
order18.head()

Unnamed: 0,team_id,team,team_base_name,leg,leg_dist,name,estimated_log_means,estimated_log_stdevs
4,3.0,Tampereen Pyrintö 1,Tampereen Pyrintö,1,11.0,Otto Simosas,1.944005,0.084975
5,3.0,Tampereen Pyrintö 1,Tampereen Pyrintö,2,11.9,Elias Kuukka,1.944005,0.084975
6,3.0,Tampereen Pyrintö 1,Tampereen Pyrintö,3,12.8,Johan Runesson,1.944005,0.084975
7,3.0,Tampereen Pyrintö 1,Tampereen Pyrintö,4,8.7,Anton Kuukka,1.944005,0.084975
8,3.0,Tampereen Pyrintö 1,Tampereen Pyrintö,5,8.7,Severi Kymäläinen,1.944005,0.084975


In [695]:
# Combine history with 2018 runners 
no_history_row = pd.DataFrame([[0,0]], columns=["log_means", "log_stdevs"])
def get_history_row(running_order_row):
    name = running_order_row["name"].lower()
    
    by_name = history[history['name'] == name]
    by_name_and_colon = history[history['name'].str.contains(name + ":", regex=False)]

    runners = by_name.append(by_name_and_colon)
    if(len(runners) == 1):
        return runners
    team_name = running_order_row["team_base_name"].upper()
    runners = runners[runners['teams'].str.contains(team_name, regex=False)]
    if(len(runners) == 1):
        return runners
    if(len(runners) == 0):
        return no_history_row
    print(f"name '{name}' team_name '{team_name}'")
    print(f"by_name {len(by_name)} by_name_and_colon {len(by_name_and_colon)} runners {len(runners)}")
    print(f"Duplicate runner {runners}")
    #print(f"TEAMS by_name_and_colon {by_name_and_colon['teams']}")
    return runners.sort_values("num_runs", ascending = False).head(1)

def get_estimate_params(running_order_row):
    history_row = get_history_row(running_order_row)
    #print(f"estimate_row log_means {history_row.log_means} {history_row.log_stdevs}")
    log_means = history_row.log_means.values[0]
    log_stdevs = history_row.log_stdevs.values[0]
    return pd.Series({"history_log_means": log_means, "history_log_stdevs": log_stdevs})

#order18 = order18[order18['team'].str.contains("Reak") | order18['team'].str.contains("Puskasil") | order18['team'].str.contains("Rastihaukat")]
estimate_params = order18.apply(lambda row: get_estimate_params(row), axis=1)
order18 = order18.assign(history_log_means = estimate_params.history_log_means)
order18 = order18.assign(history_log_stdevs = estimate_params.history_log_stdevs)

name 'petri laaksonen' team_name 'UNO SK'
by_name 0 by_name_and_colon 6 runners 2
Duplicate runner       mean_team_id            teams                             name  \
3977         611.0  UNO SK-HARRASTE  petri laaksonen:UNO SK-HARRASTE   
3979         698.0           UNO SK           petri laaksonen:UNO SK   

      num_runs  num_valid_times  mean_pace  stdev  pace_1  pace_2  pace_3  \
3977         1                1      8.749    0.0   8.749     NaN     NaN   
3979         1                1      7.124    0.0   7.124     NaN     NaN   

      pace_4  pace_5  pace_6  log_means  log_stdevs  
3977     NaN     NaN     NaN   2.168939         0.0  
3979     NaN     NaN     NaN   1.963469         0.0  
name 'arto laaksonen' team_name 'UNO SK'
by_name 0 by_name_and_colon 6 runners 2
Duplicate runner       mean_team_id            teams                            name  num_runs  \
3984         611.0  UNO SK-HARRASTE  arto laaksonen:UNO SK-HARRASTE         1   
3985         698.0           U

In [696]:
order18['log_means'] = np.where(np.isfinite(order18["history_log_means"]) & order18["history_log_means"] > 0, order18["history_log_means"], order18["estimated_log_means"])
order18['log_stdevs'] = np.where(np.isfinite(order18["history_log_stdevs"]) & order18["history_log_stdevs"] > 0, order18["history_log_stdevs"], order18["estimated_log_stdevs"])
order18.head()


Unnamed: 0,team_id,team,team_base_name,leg,leg_dist,name,estimated_log_means,estimated_log_stdevs,history_log_means,history_log_stdevs,log_means,log_stdevs
4,3.0,Tampereen Pyrintö 1,Tampereen Pyrintö,1,11.0,Otto Simosas,1.944005,0.084975,1.817424,0.101691,1.817424,0.101691
5,3.0,Tampereen Pyrintö 1,Tampereen Pyrintö,2,11.9,Elias Kuukka,1.944005,0.084975,1.819386,0.111737,1.819386,0.111737
6,3.0,Tampereen Pyrintö 1,Tampereen Pyrintö,3,12.8,Johan Runesson,1.944005,0.084975,1.775426,0.086651,1.775426,0.086651
7,3.0,Tampereen Pyrintö 1,Tampereen Pyrintö,4,8.7,Anton Kuukka,1.944005,0.084975,1.844396,0.114598,1.844396,0.114598
8,3.0,Tampereen Pyrintö 1,Tampereen Pyrintö,5,8.7,Severi Kymäläinen,1.944005,0.084975,1.813504,0.048913,1.813504,0.048913


In [697]:
# Calculate personal estimates
# s = sigma and scale = exp(mu).

log_means = np.exp(order18['log_means']) 
log_stdevs = order18['log_stdevs']

intervals95 = lognorm.interval(0.95, s = log_stdevs, scale = log_means)
means = lognorm.mean(s = log_stdevs, scale = log_means)
medians = lognorm.median(s = log_stdevs, scale = log_means)

In [698]:
order18 = order18.assign(ind_95_start = pd.Series(intervals95[0] * order18.leg_dist).values)
order18 = order18.assign(ind_95_end = pd.Series(intervals95[1] * order18.leg_dist).values)
order18 = order18.assign(ind_mean = pd.Series(means * order18.leg_dist).values)
order18 = order18.assign(ind_median = pd.Series(medians * order18.leg_dist).values)

In [699]:
by_teams = order18.set_index(["team_id", "leg"]).unstack()
by_teams.head()

Unnamed: 0_level_0,team,team,team,team,team,team,team,team_base_name,team_base_name,team_base_name,...,ind_mean,ind_mean,ind_mean,ind_median,ind_median,ind_median,ind_median,ind_median,ind_median,ind_median
leg,1,2,3,4,5,6,7,1,2,3,...,5,6,7,1,2,3,4,5,6,7
team_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
3.0,Tampereen Pyrintö 1,Tampereen Pyrintö 1,Tampereen Pyrintö 1,Tampereen Pyrintö 1,Tampereen Pyrintö 1,Tampereen Pyrintö 1,Tampereen Pyrintö 1,Tampereen Pyrintö,Tampereen Pyrintö,Tampereen Pyrintö,...,53.411364,61.058061,87.339752,67.715767,73.400057,75.555759,55.021214,53.34751,61.026089,87.281419
5.0,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK,Halden SK,Halden SK,...,62.097304,75.79495,85.633899,64.133501,75.142332,77.273814,59.881118,61.654866,75.521582,85.578863
6.0,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat,Helsingin Suunnistajat,Helsingin Suunnistajat,...,56.917205,73.143192,95.454671,71.06753,81.893692,79.259454,56.622836,56.567705,72.479759,95.225971
8.0,Hiidenkiertäjät 1,Hiidenkiertäjät 1,Hiidenkiertäjät 1,Hiidenkiertäjät 1,Hiidenkiertäjät 1,Hiidenkiertäjät 1,Hiidenkiertäjät 1,Hiidenkiertäjät,Hiidenkiertäjät,Hiidenkiertäjät,...,55.11734,67.175768,89.509771,64.842688,81.656738,74.557289,54.048333,55.029472,66.579191,89.286199
11.0,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne,OK Linne,OK Linne,...,56.721972,67.114796,92.568771,68.047577,75.63089,74.017768,51.993243,55.59339,66.93345,92.321787


In [700]:
# remove teams missing some runners
print(len(by_teams))
by_teams = by_teams[np.isfinite(by_teams.log_means[1]) & np.isfinite(by_teams.log_means[2]) & np.isfinite(by_teams.log_means[3]) & np.isfinite(by_teams.log_means[4]) & np.isfinite(by_teams.log_means[5]) & np.isfinite(by_teams.log_means[6]) & np.isfinite(by_teams.log_means[7])] 
print(len(by_teams))

1479
1461


In [701]:
# Sum of log normal variables is not defined 
# so we simulate 10000 runs for each user and sum them and then do statistics on simulated results 
def simulate_relay_estimates(row):
    samples = pd.DataFrame()
    for i in range(1,8):
        if np.isnan(row["log_means"][i]):
            print(row["log_means"])
            print(row["name"])
        samples[i] = row["leg_dist"][i] * lognorm.rvs(s = row["log_stdevs"][i], scale = np.exp(row["log_means"][i]), size = 10000)

    samples_sums = pd.DataFrame()
    # leg_1 
    # leg_1 + leg_2
    # leg_1 + leg_2 + leg_3
    # ...
    for i in range(1,8):
        samples_sums[i] = np.sum([ samples[j] for j in range(1,i+1) ], axis=0)

    start95 = samples_sums.quantile(0.025)
    end95 = samples_sums.quantile(0.975)
    medians = samples_sums.median()
    means = samples_sums.mean()
    
    sum_logs = np.log(samples_sums)
    sum_log_means = np.mean(sum_logs)
    sum_log_stds = np.std(sum_logs)
    
    """
    for i in range(1,8):
        bins = int(samples_sums[i].max() - samples_sums[i].min())
        name = row["name"][i]
        plt.title(f"{name} bins = {bins}")
        plt.hist(samples_sums[i], bins=bins)
        #plt.axvline(x=row["fin_real"][i], color="r")        
        plt.axvline(x=medians[i], color="g")
        plt.axvline(x=means[i], color="yellow")
        plt.show()    
    """
    
    """
    bins = int( (samples_sums.max().max() - samples_sums.min().min()) / 5) 
    plt.figure(figsize=(20, 6))
    plt.title(f"Whole team, bins = {bins}")
    plt.hist([samples_sums[1], samples_sums[2], samples_sums[3], samples_sums[4], samples_sums[5], samples_sums[6], samples_sums[7]], bins=bins)
    for i in range(1,8):
        #plt.axvline(x=row["fin_real"][i], color="r")
        plt.axvline(x=medians[i], color="g")

    plt.show()
    """
    
    fin_start95_dict = {f"fin_start95_{leg}" : start95.values[leg-1] for leg in range(1,8)}
    fin_end95_dict = {f"fin_end95_{leg}" : end95.values[leg-1] for leg in range(1,8)}
    fin_median_dict = {f"fin_median_{leg}" : medians.values[leg-1] for leg in range(1,8)}
    fin_mean_dict = {f"fin_mean_{leg}" : means.values[leg-1] for leg in range(1,8)}
    fin_sum_log_means_dict = {f"fin_sum_log_mean_{leg}" : sum_log_means.values[leg-1] for leg in range(1,8)}
    fin_sum_log_stds_dict = {f"fin_sum_log_std_{leg}" : sum_log_stds.values[leg-1] for leg in range(1,8)}
    new_cols = {**fin_start95_dict, **fin_end95_dict, **fin_median_dict, **fin_mean_dict, **fin_sum_log_means_dict, **fin_sum_log_stds_dict}

    #print(start95.values)
    #print(new_cols)
    return pd.Series(new_cols)

relay_estimates = by_teams.apply(simulate_relay_estimates, axis=1)
relay_estimates.head()

Unnamed: 0_level_0,fin_start95_1,fin_start95_2,fin_start95_3,fin_start95_4,fin_start95_5,fin_start95_6,fin_start95_7,fin_end95_1,fin_end95_2,fin_end95_3,...,fin_sum_log_mean_5,fin_sum_log_mean_6,fin_sum_log_mean_7,fin_sum_log_std_1,fin_sum_log_std_2,fin_sum_log_std_3,fin_sum_log_std_4,fin_sum_log_std_5,fin_sum_log_std_6,fin_sum_log_std_7
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3.0,55.383496,122.216315,193.874995,246.71145,299.415646,360.284725,447.200087,83.3787,164.703838,243.982948,...,5.787746,5.959401,6.162848,0.102922,0.07664,0.058421,0.051696,0.043975,0.037385,0.031237
5.0,61.06448,126.448663,197.152214,255.525615,313.570274,386.487927,471.679024,67.382527,154.635834,239.777047,...,5.82576,6.027526,6.215378,0.025041,0.051559,0.049808,0.042074,0.040717,0.036758,0.031075
6.0,51.953436,125.127763,202.842856,257.505371,312.348141,380.230955,473.922255,97.424926,188.947945,269.898284,...,5.849441,6.040683,6.245587,0.160246,0.104389,0.072535,0.0618,0.05472,0.051104,0.043759
8.0,52.224874,126.323775,199.552166,252.927025,307.213167,369.429379,456.329614,80.828058,171.22223,246.413473,...,5.802014,5.986357,6.18931,0.110984,0.078002,0.05418,0.044694,0.038469,0.038986,0.034012
11.0,57.221458,126.330617,196.937265,248.236204,297.49646,363.318761,452.599555,80.800633,164.02715,242.034563,...,5.789502,5.976417,6.187323,0.087599,0.067063,0.053081,0.043632,0.04935,0.042708,0.037329


In [702]:
len(relay_estimates)
#relay_estimates[["fin_sum_log_mean_1", "fin_sum_log_std_1", "fin_sum_log_mean_2", "fin_sum_log_std_2", "fin_sum_log_mean_3", "fin_sum_log_std_3"]]

1461

In [703]:
# Flatten the troublesome multi-index to field_{leg} etc...
by_teams_flat = by_teams.copy()
by_teams_flat.columns = [f'{x[0]}_{x[1]}' for x in by_teams_flat.columns]
by_teams_flat.reset_index()


Unnamed: 0,team_id,team_1,team_2,team_3,team_4,team_5,team_6,team_7,team_base_name_1,team_base_name_2,...,ind_mean_5,ind_mean_6,ind_mean_7,ind_median_1,ind_median_2,ind_median_3,ind_median_4,ind_median_5,ind_median_6,ind_median_7
0,3.0,Tampereen Pyrintö 1,Tampereen Pyrintö 1,Tampereen Pyrintö 1,Tampereen Pyrintö 1,Tampereen Pyrintö 1,Tampereen Pyrintö 1,Tampereen Pyrintö 1,Tampereen Pyrintö,Tampereen Pyrintö,...,53.411364,61.058061,87.339752,67.715767,73.400057,75.555759,55.021214,53.347510,61.026089,87.281419
1,5.0,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK 1,Halden SK,Halden SK,...,62.097304,75.794950,85.633899,64.133501,75.142332,77.273814,59.881118,61.654866,75.521582,85.578863
2,6.0,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat 1,Helsingin Suunnistajat,Helsingin Suunnistajat,...,56.917205,73.143192,95.454671,71.067530,81.893692,79.259454,56.622836,56.567705,72.479759,95.225971
3,8.0,Hiidenkiertäjät 1,Hiidenkiertäjät 1,Hiidenkiertäjät 1,Hiidenkiertäjät 1,Hiidenkiertäjät 1,Hiidenkiertäjät 1,Hiidenkiertäjät 1,Hiidenkiertäjät,Hiidenkiertäjät,...,55.117340,67.175768,89.509771,64.842688,81.656738,74.557289,54.048333,55.029472,66.579191,89.286199
4,11.0,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne 1,OK Linne,OK Linne,...,56.721972,67.114796,92.568771,68.047577,75.630890,74.017768,51.993243,55.593390,66.933450,92.321787
5,13.0,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot 1,Vehkalahden Veikot,Vehkalahden Veikot,...,55.101333,72.915405,95.417328,71.504227,74.174520,85.982990,57.192691,55.006088,72.486686,94.986823
6,14.0,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima 1,Ikaalisten Nouseva-Voima,Ikaalisten Nouseva-Voima,...,62.576465,66.628415,102.489651,70.737369,77.901033,84.792953,54.158064,62.080109,66.525176,101.902216
7,15.0,Vaajakosken Terä 1,Vaajakosken Terä 1,Vaajakosken Terä 1,Vaajakosken Terä 1,Vaajakosken Terä 1,Vaajakosken Terä 1,Vaajakosken Terä 1,Vaajakosken Terä,Vaajakosken Terä,...,58.325239,67.159586,95.230899,65.156383,69.936300,79.595441,51.979770,57.600834,66.807875,95.037990
8,17.0,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving 1,IL Tyrving,IL Tyrving,...,57.333536,67.761076,84.976476,65.716365,72.341834,72.698226,53.732789,57.079184,67.619799,84.945242
9,18.0,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK 1,Nydalens SK,Nydalens SK,...,50.992857,65.046605,94.260852,69.885691,77.440836,76.428838,55.973405,50.808000,64.810800,93.949004


In [704]:
estimates = pd.concat([by_teams_flat, relay_estimates], axis=1, join='inner')

In [705]:
# Convert minutes to date and times
start_timestamp = pd.Timestamp(year = 2018, month = 6, day = 16, hour = 23)

for leg in range(1,8):
    estimates[f"fint_median_{leg}"] = pd.to_datetime(estimates[f"fin_median_{leg}"] * 60, unit = "s", origin= start_timestamp)
    estimates[f"fint_start95_{leg}"] = pd.to_datetime(estimates[f"fin_start95_{leg}"] * 60, unit = "s", origin= start_timestamp)
    estimates[f"fint_end95_{leg}"] = pd.to_datetime(estimates[f"fin_end95_{leg}"] * 60, unit = "s", origin= start_timestamp)


In [706]:
# Sort teams by estimated total time 
estimates = estimates.sort_values("fin_median_7")

estimates.to_csv('data/team_estimates_ju2018.tsv', sep="\t")

In [707]:
estimates[["team_1", "fin_median_7"]].head(10)

Unnamed: 0_level_0,team_1,fin_median_7
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1
3.0,Tampereen Pyrintö 1,474.479075
17.0,IL Tyrving 1,474.901934
35.0,OLV Baselland 1,482.126532
11.0,OK Linne 1,486.080085
8.0,Hiidenkiertäjät 1,487.445455
15.0,Vaajakosken Terä 1,488.083652
18.0,Nydalens SK 1,491.140926
29.0,Rajamäen Rykmentti 1,492.398137
5.0,Halden SK 1,500.03498
27.0,OK Trian 1,503.784524


In [708]:
teams_to_follow = estimates[estimates['team_1'].str.contains("Reak") | estimates['team_1'].str.contains("Puskasil") | estimates['team_1'].str.contains("Rastihaukat")]
teams_to_follow[["team_1", "fin_median_7", "fin_start95_7", "fin_end95_7"]]

Unnamed: 0_level_0,team_1,fin_median_7,fin_start95_7,fin_end95_7
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
429.0,Reaktor Innovations 1,744.378617,698.683599,796.622805
568.0,Rastihaukat 1,751.558274,698.4424,809.801107
1270.0,Reaktor Innovations 2,883.551235,793.614558,988.338459
1246.0,Rastihaukat 2,909.76943,833.081557,994.333183
1131.0,Puskasilimät OK 1,914.191071,856.234733,976.539486


In [709]:
leg_1_cols = list(filter(lambda c: "_1" in c,estimates.columns.values))
column_base_names = list(map(lambda c: c[:-2], leg_1_cols))
runner_estimates = pd.wide_to_long(estimates.reset_index(), stubnames=column_base_names, i ="team_id", j="leg", sep = "_").sort_values(by=['team_id', 'leg'])
runner_estimates = runner_estimates.drop(['team_base_name', 'estimated_log_means', 'estimated_log_stdevs'], axis=1)
runner_estimates.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,team,leg_dist,name,history_log_means,history_log_stdevs,log_means,log_stdevs,ind_95_start,ind_95_end,ind_mean,ind_median,fin_start95,fin_end95,fin_median,fin_mean,fin_sum_log_mean,fin_sum_log_std,fint_median,fint_start95,fint_end95
team_id,leg,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3.0,1,Tampereen Pyrintö 1,11.0,Otto Simosas,1.817424,0.101691,1.817424,0.101691,55.479226,82.651207,68.066799,67.715767,55.383496,83.3787,67.809902,68.138156,4.216241,0.102922,2018-06-17 00:07:48.594122,2018-06-16 23:55:23.009760,2018-06-17 00:23:22.721970
3.0,2,Tampereen Pyrintö 1,11.9,Elias Kuukka,1.819386,0.111737,1.819386,0.111737,58.963824,91.370742,73.859696,73.400057,122.216315,164.703838,141.436151,141.974458,4.952709,0.07664,2018-06-17 01:21:26.169085,2018-06-17 01:02:12.978916,2018-06-17 01:44:42.230302
3.0,3,Tampereen Pyrintö 1,12.8,Johan Runesson,1.775426,0.086651,1.775426,0.086651,63.754404,89.541622,75.839943,75.555759,193.874995,243.982948,217.409832,217.871802,5.3822,0.058421,2018-06-17 02:37:24.589939,2018-06-17 02:13:52.499687,2018-06-17 03:03:58.976885
3.0,4,Tampereen Pyrintö 1,8.7,Anton Kuukka,1.844396,0.114598,1.844396,0.114598,43.952596,68.877252,55.383689,55.021214,246.71145,301.510271,272.557978,273.177262,5.608784,0.051696,2018-06-17 03:32:33.478685,2018-06-17 03:06:42.687000,2018-06-17 04:01:30.616288
3.0,5,Tampereen Pyrintö 1,8.7,Severi Kymäläinen,1.813504,0.048913,1.813504,0.048913,48.470747,58.714937,53.411364,53.34751,299.415646,355.29943,326.009013,326.592663,5.787746,0.043975,2018-06-17 04:26:00.540767,2018-06-17 03:59:24.938789,2018-06-17 04:55:17.965786


In [710]:
runner_estimates.to_csv('data/runner_estimates_ju2018.tsv', sep="\t")

In [711]:
runner_estimates[runner_estimates['team'].str.contains("Reak")][["name", "log_means", "log_stdevs", "ind_median"]]


Unnamed: 0_level_0,Unnamed: 1_level_0,name,log_means,log_stdevs,ind_median
team_id,leg,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
429.0,1,Mikko Peltonen,2.184951,0.081299,97.792383
429.0,2,Pasi Huhtiniemi,2.222239,0.072482,109.812846
429.0,3,Joni Freeman,2.130879,0.113846,107.805019
429.0,4,Tuomas Kareinen,2.29324,0.16007,86.19078
429.0,5,Janne Vaittinen,2.324384,0.039861,88.917301
429.0,6,Olavi Kanerva,2.128667,0.092121,90.759473
429.0,7,Jyri Kytömäki,2.355319,0.048494,161.284847
1270.0,1,Antti-Ville Jokela,2.363116,0.10623,116.864
1270.0,2,Karri-Pekka Laakso,2.414677,0.081841,133.115223
1270.0,3,Oskari Pirttikoski,2.637533,0.194072,178.927107


In [712]:
for_print = runner_estimates.copy()
for_print = for_print.reset_index()
for_print = for_print.set_index('team_id')
for_print = for_print.round(2)
for_print.fint_median = for_print.fint_median.dt.strftime("%H:%M")
for_print.fint_start95 = for_print.fint_start95.dt.strftime("%H:%M")
for_print.fint_end95 = for_print.fint_end95.dt.strftime("%H:%M")
for_print = for_print[['team',
 'leg',
 'name',
 'ind_median', 
 'ind_95_start', 
 'ind_95_end', 
 'fin_median',
 'fint_median',
 'fint_start95',
 'fint_end95']]
for_print.head()

Unnamed: 0_level_0,team,leg,name,ind_median,ind_95_start,ind_95_end,fin_median,fint_median,fint_start95,fint_end95
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
3.0,Tampereen Pyrintö 1,1,Otto Simosas,67.72,55.48,82.65,67.81,00:07,23:55,00:23
3.0,Tampereen Pyrintö 1,2,Elias Kuukka,73.4,58.96,91.37,141.44,01:21,01:02,01:44
3.0,Tampereen Pyrintö 1,3,Johan Runesson,75.56,63.75,89.54,217.41,02:37,02:13,03:03
3.0,Tampereen Pyrintö 1,4,Anton Kuukka,55.02,43.95,68.88,272.56,03:32,03:06,04:01
3.0,Tampereen Pyrintö 1,5,Severi Kymäläinen,53.35,48.47,58.71,326.01,04:26,03:59,04:55


In [713]:
for_print.to_csv('for_print_ju2018.tsv', sep="\t")

In [714]:
for_print[for_print['team'].str.contains("Reak") | for_print['team'].str.contains("Puskasil") | for_print['team'].str.contains("Rastihaukat 2")]


Unnamed: 0_level_0,team,leg,name,ind_median,ind_95_start,ind_95_end,fin_median,fint_median,fint_start95,fint_end95
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
429.0,Reaktor Innovations 1,1,Mikko Peltonen,97.79,83.39,114.68,97.68,00:37,00:23,00:54
429.0,Reaktor Innovations 1,2,Pasi Huhtiniemi,109.81,95.27,126.58,207.89,02:27,02:06,02:51
429.0,Reaktor Innovations 1,3,Joni Freeman,107.81,86.24,134.75,315.84,04:15,03:45,04:51
429.0,Reaktor Innovations 1,4,Tuomas Kareinen,86.19,62.98,117.95,402.76,05:42,05:04,06:29
429.0,Reaktor Innovations 1,5,Janne Vaittinen,88.92,82.24,96.14,491.9,07:11,06:32,07:59
429.0,Reaktor Innovations 1,6,Olavi Kanerva,90.76,75.77,108.72,582.91,08:42,08:00,09:32
429.0,Reaktor Innovations 1,7,Jyri Kytömäki,161.28,146.66,177.37,744.38,11:24,10:38,12:16
1131.0,Puskasilimät OK 1,1,Tuomas Ketonen,124.33,101.43,152.41,124.68,01:04,00:42,01:32
1131.0,Puskasilimät OK 1,2,Petri Miettinen,131.06,122.12,140.64,256.0,03:16,02:51,03:45
1131.0,Puskasilimät OK 1,3,Jaakko Havola,146.75,135.82,158.57,402.98,05:42,05:15,06:14


In [715]:
for_web = runner_estimates.copy().reset_index()
for_web = for_web[[
 'team_id',
 'leg',
 'team',
 'name',
 'fin_sum_log_mean', 
 'fin_sum_log_std',
 'fin_median',
 'fint_median']]
for_web

Unnamed: 0,team_id,leg,team,name,fin_sum_log_mean,fin_sum_log_std,fin_median,fint_median
0,3.0,1,Tampereen Pyrintö 1,Otto Simosas,4.216241,0.102922,67.809902,2018-06-17 00:07:48.594122000
1,3.0,2,Tampereen Pyrintö 1,Elias Kuukka,4.952709,0.076640,141.436151,2018-06-17 01:21:26.169085000
2,3.0,3,Tampereen Pyrintö 1,Johan Runesson,5.382200,0.058421,217.409832,2018-06-17 02:37:24.589939000
3,3.0,4,Tampereen Pyrintö 1,Anton Kuukka,5.608784,0.051696,272.557978,2018-06-17 03:32:33.478685000
4,3.0,5,Tampereen Pyrintö 1,Severi Kymäläinen,5.787746,0.043975,326.009013,2018-06-17 04:26:00.540767000
5,3.0,6,Tampereen Pyrintö 1,Aleksi Niemi,5.959401,0.037385,387.030723,2018-06-17 05:27:01.843399000
6,3.0,7,Tampereen Pyrintö 1,Florian Howald,6.162848,0.031237,474.479075,2018-06-17 06:54:28.744524000
7,5.0,1,Halden SK 1,Emil Wingstedt,4.161035,0.025041,64.131176,2018-06-17 00:04:07.870590000
8,5.0,2,Halden SK 1,Fredrik Eliasson,4.936547,0.051559,139.154392,2018-06-17 01:19:09.263498999
9,5.0,3,Halden SK 1,Thomas Natvig Årstad,5.379455,0.049808,216.663619,2018-06-17 02:36:39.817118000


In [716]:
for_web.to_json('web-lib/for_web_ju2018.json', orient="records")