In [575]:
import pandas as pd
import numpy as np
from scipy.stats import lognorm
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.metrics import mean_absolute_error

In [576]:
in_file_name = 'data/grouped_paces_ju.tsv'
df_all = pd.read_csv(in_file_name, delimiter="\t")
history = df_all

In [577]:
order18 = pd.read_csv('data/running_order_j2018_ju.tsv', delimiter="\t")

In [578]:
#paces = df.as_matrix(["pace_1", "pace_2", "pace_3", "pace_4", "pace_5", "pace_6"])
paces = history[["pace_1", "pace_2", "pace_3", "pace_4", "pace_5", "pace_6"]]
logs = np.log(paces)
means = np.nanmean(logs, axis=1)
stdevs = np.nanstd(logs, axis=1)
history = history.assign(log_means=pd.Series(means).values)
history = history.assign(log_stdevs=pd.Series(stdevs).values)

  after removing the cwd from sys.path.
  keepdims=keepdims)


In [579]:
# Estimate values for all, but only use them if no history is available
with_history = history[np.isfinite(history.pace_2)]
x = with_history.mean_team_id.values
x = x.reshape(len(x), 1)

log_means = with_history.log_means.values.reshape(len(with_history.log_means), 1)
log_means_model = linear_model.LinearRegression()
log_means_model.fit(x, log_means)

estimated_log_means = log_means_model.predict(order18.team_id.values.reshape(len(order18.team_id), 1))
order18 = order18.assign(estimated_log_means=estimated_log_means)

log_stdevs = with_history.log_stdevs.values.reshape(len(with_history.log_stdevs), 1)
log_stdevs_model = linear_model.LinearRegression()
log_stdevs_model.fit(x, log_stdevs)

estimated_log_stdevs = log_stdevs_model.predict(order18.team_id.values.reshape(len(order18.team_id), 1))
order18 = order18.assign(estimated_log_stdevs=estimated_log_stdevs)
order18.head()

Unnamed: 0,team_id,team,team_base_name,leg,leg_dist,name,estimated_log_means,estimated_log_stdevs
0,5,Halden SK 1,Halden SK,1,11.0,Emil Wingstedt,1.944872,0.085008
1,5,Halden SK 1,Halden SK,2,11.9,Fredrik Eliasson,1.944872,0.085008
2,5,Halden SK 1,Halden SK,3,12.8,Thomas Natvig Årstad,1.944872,0.085008
3,5,Halden SK 1,Halden SK,4,8.7,Alexander Chepelin,1.944872,0.085008
4,5,Halden SK 1,Halden SK,5,8.7,Riccardo Scalet,1.944872,0.085008


In [580]:
# Combine history with 2018 runners 
no_history_row = pd.DataFrame([[0,0]], columns=["log_means", "log_stdevs"])
def get_history_row(running_order_row):
    name = running_order_row["name"].lower()
    
    by_name = history[history['name'] == name]
    by_name_and_colon = history[history['name'].str.contains(name + ":", regex=False)]

    runners = by_name.append(by_name_and_colon)
    if(len(runners) == 1):
        return runners
    team_name = running_order_row["team_base_name"].upper()
    runners = runners[runners['teams'].str.contains(team_name, regex=False)]
    if(len(runners) == 1):
        return runners
    if(len(runners) == 0):
        return no_history_row
    print(f"name '{name}' team_name '{team_name}'")
    print(f"by_name {len(by_name)} by_name_and_colon {len(by_name_and_colon)} runners {len(runners)}")
    print(f"Duplicate runner {runners}")
    #print(f"TEAMS by_name_and_colon {by_name_and_colon['teams']}")
    return runners.sort_values("num_runs", ascending = False).head(1)

def get_estimate_params(running_order_row):
    history_row = get_history_row(running_order_row)
    #print(f"estimate_row log_means {history_row.log_means} {history_row.log_stdevs}")
    log_means = history_row.log_means.values[0]
    log_stdevs = history_row.log_stdevs.values[0]
    return pd.Series({"history_log_means": log_means, "history_log_stdevs": log_stdevs})

order18 = order18[order18['team'].str.contains("Reak") | order18['team'].str.contains("Puskasil") | order18['team'].str.contains("Rastihaukat")]
estimate_params = order18.apply(lambda row: get_estimate_params(row), axis=1)
order18 = order18.assign(history_log_means = estimate_params.history_log_means)
order18 = order18.assign(history_log_stdevs = estimate_params.history_log_stdevs)

In [581]:
order18['log_means'] = np.where(np.isfinite(order18["history_log_means"]) & order18["history_log_means"] > 0, order18["history_log_means"], order18["estimated_log_means"])
order18['log_stdevs'] = np.where(np.isfinite(order18["history_log_stdevs"]) & order18["history_log_stdevs"] > 0, order18["history_log_stdevs"], order18["estimated_log_stdevs"])
order18.head()


Unnamed: 0,team_id,team,team_base_name,leg,leg_dist,name,estimated_log_means,estimated_log_stdevs,history_log_means,history_log_stdevs,log_means,log_stdevs
1429,429,Reaktor Innovations 1,Reaktor Innovations,1,11.0,Mikko Peltonen,2.128667,0.092121,2.184951,0.081299,2.184951,0.081299
1430,429,Reaktor Innovations 1,Reaktor Innovations,2,11.9,Pasi Huhtiniemi,2.128667,0.092121,2.222239,0.072482,2.222239,0.072482
1431,429,Reaktor Innovations 1,Reaktor Innovations,3,12.8,Joni Freeman,2.128667,0.092121,2.130879,0.113846,2.130879,0.113846
1432,429,Reaktor Innovations 1,Reaktor Innovations,4,8.7,Tuomas Kareinen,2.128667,0.092121,2.29324,0.16007,2.29324,0.16007
1433,429,Reaktor Innovations 1,Reaktor Innovations,5,8.7,Janne Vaittinen,2.128667,0.092121,2.324384,0.039861,2.324384,0.039861


In [582]:
# Calculate personal estimates
# s = sigma and scale = exp(mu).

log_means = np.exp(order18['log_means']) 
log_stdevs = order18['log_stdevs']

intervals95 = lognorm.interval(0.95, s = log_stdevs, scale = log_means)
means = lognorm.mean(s = log_stdevs, scale = log_means)
medians = lognorm.median(s = log_stdevs, scale = log_means)

In [583]:
order18 = order18.assign(ind_95_start = pd.Series(intervals95[0] * order18.leg_dist).values)
order18 = order18.assign(ind_95_end = pd.Series(intervals95[1] * order18.leg_dist).values)
order18 = order18.assign(ind_mean = pd.Series(means * order18.leg_dist).values)
order18 = order18.assign(ind_median = pd.Series(medians * order18.leg_dist).values)

In [584]:
by_teams = order18.set_index(["team_id", "leg"]).unstack()
by_teams.head()

Unnamed: 0_level_0,team,team,team,team,team,team,team,team_base_name,team_base_name,team_base_name,...,ind_mean,ind_mean,ind_mean,ind_median,ind_median,ind_median,ind_median,ind_median,ind_median,ind_median
leg,1,2,3,4,5,6,7,1,2,3,...,5,6,7,1,2,3,4,5,6,7
team_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
429,Reaktor Innovations 1,Reaktor Innovations 1,Reaktor Innovations 1,Reaktor Innovations 1,Reaktor Innovations 1,Reaktor Innovations 1,Reaktor Innovations 1,Reaktor Innovations,Reaktor Innovations,Reaktor Innovations,...,88.987969,91.145398,161.474599,97.792383,109.812846,107.805019,86.19078,88.917301,90.759473,161.284847
568,Rastihaukat 1,Rastihaukat 1,Rastihaukat 1,Rastihaukat 1,Rastihaukat 1,Rastihaukat 1,Rastihaukat 1,Rastihaukat,Rastihaukat,Rastihaukat,...,91.516128,98.451282,137.171722,98.429519,103.747618,136.125039,84.974872,90.809052,98.061808,136.561204
1131,Puskasilimät OK 1,Puskasilimät OK 1,Puskasilimät OK 1,Puskasilimät OK 1,Puskasilimät OK 1,Puskasilimät OK 1,Puskasilimät OK 1,Puskasilimät OK,Puskasilimät OK,Puskasilimät OK,...,99.652408,123.706438,176.548436,124.333,131.055553,146.752693,110.577,99.115993,123.040544,175.5981
1246,Rastihaukat 2,Rastihaukat 2,Rastihaukat 2,Rastihaukat 2,Rastihaukat 2,Rastihaukat 2,Rastihaukat 2,Rastihaukat,Rastihaukat,Rastihaukat,...,108.639768,130.055791,155.801222,119.13,113.319264,157.014619,122.436943,108.272688,129.329552,154.826596
1270,Reaktor Innovations 2,Reaktor Innovations 2,Reaktor Innovations 2,Reaktor Innovations 2,Reaktor Innovations 2,Reaktor Innovations 2,Reaktor Innovations 2,Reaktor Innovations,Reaktor Innovations,Reaktor Innovations,...,105.867314,121.916293,157.389252,116.864,133.115223,178.927107,67.174301,105.271651,119.808344,156.5037


In [585]:
# remove teams missing some runners
print(len(by_teams))
by_teams = by_teams[np.isfinite(by_teams.log_means[1]) & np.isfinite(by_teams.log_means[2]) & np.isfinite(by_teams.log_means[3]) & np.isfinite(by_teams.log_means[4]) & np.isfinite(by_teams.log_means[5]) & np.isfinite(by_teams.log_means[6]) & np.isfinite(by_teams.log_means[7])] 
print(len(by_teams))

5
5


In [586]:
# Sum of log normal variables is not defined 
# so we simulate 10000 runs for each user and sum them and then do statistics on simulated results 
def simulate_relay_estimates(row):
    samples = pd.DataFrame()
    for i in range(1,8):
        if np.isnan(row["log_means"][i]):
            print(row["log_means"])
            print(row["name"])
        samples[i] = row["leg_dist"][i] * lognorm.rvs(s = row["log_stdevs"][i], scale = np.exp(row["log_means"][i]), size = 10000)

    samples_sums = pd.DataFrame()
    # leg_1 
    # leg_1 + leg_2
    # leg_1 + leg_2 + leg_3
    # ...
    for i in range(1,8):
        samples_sums[i] = np.sum([ samples[j] for j in range(1,i+1) ], axis=0)

    start95 = samples_sums.quantile(0.025)
    end95 = samples_sums.quantile(0.975)
    medians = samples_sums.median()
    means = samples_sums.mean()
    
    sum_logs = np.log(samples_sums)
    sum_log_means = np.mean(sum_logs)
    sum_log_stds = np.std(sum_logs)
    
    """
    for i in range(1,8):
        bins = int(samples_sums[i].max() - samples_sums[i].min())
        name = row["name"][i]
        plt.title(f"{name} bins = {bins}")
        plt.hist(samples_sums[i], bins=bins)
        #plt.axvline(x=row["fin_real"][i], color="r")        
        plt.axvline(x=medians[i], color="g")
        plt.axvline(x=means[i], color="yellow")
        plt.show()    
    """
    
    """
    bins = int( (samples_sums.max().max() - samples_sums.min().min()) / 5) 
    plt.figure(figsize=(20, 6))
    plt.title(f"Whole team, bins = {bins}")
    plt.hist([samples_sums[1], samples_sums[2], samples_sums[3], samples_sums[4], samples_sums[5], samples_sums[6], samples_sums[7]], bins=bins)
    for i in range(1,8):
        #plt.axvline(x=row["fin_real"][i], color="r")
        plt.axvline(x=medians[i], color="g")

    plt.show()
    """
    
    fin_start95_dict = {f"fin_start95_{leg}" : start95.values[leg-1] for leg in range(1,8)}
    fin_end95_dict = {f"fin_end95_{leg}" : end95.values[leg-1] for leg in range(1,8)}
    fin_median_dict = {f"fin_median_{leg}" : medians.values[leg-1] for leg in range(1,8)}
    fin_mean_dict = {f"fin_mean_{leg}" : means.values[leg-1] for leg in range(1,8)}
    fin_sum_log_means_dict = {f"fin_sum_log_mean_{leg}" : sum_log_means.values[leg-1] for leg in range(1,8)}
    fin_sum_log_stds_dict = {f"fin_sum_log_std_{leg}" : sum_log_stds.values[leg-1] for leg in range(1,8)}
    new_cols = {**fin_start95_dict, **fin_end95_dict, **fin_median_dict, **fin_mean_dict, **fin_sum_log_means_dict, **fin_sum_log_stds_dict}

    #print(start95.values)
    #print(new_cols)
    return pd.Series(new_cols)

relay_estimates = by_teams.apply(simulate_relay_estimates, axis=1)
relay_estimates.head()

Unnamed: 0_level_0,fin_start95_1,fin_start95_2,fin_start95_3,fin_start95_4,fin_start95_5,fin_start95_6,fin_start95_7,fin_end95_1,fin_end95_2,fin_end95_3,...,fin_sum_log_mean_5,fin_sum_log_mean_6,fin_sum_log_mean_7,fin_sum_log_std_1,fin_sum_log_std_2,fin_sum_log_std_3,fin_sum_log_std_4,fin_sum_log_std_5,fin_sum_log_std_6,fin_sum_log_std_7
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
429,83.214404,187.291538,285.380493,363.046091,451.179125,539.092769,699.130783,114.544494,231.640467,351.608122,...,6.199739,6.369545,6.613788,0.080875,0.054378,0.053298,0.054527,0.045293,0.040714,0.033445
568,76.088157,173.767651,305.31462,385.531561,470.925189,566.597727,698.340527,127.175162,237.214442,377.657338,...,6.246266,6.421004,6.622326,0.131458,0.079218,0.054168,0.049279,0.045813,0.040841,0.037227
1131,101.202365,230.331667,375.079865,478.658217,573.146848,690.029985,855.153383,152.399926,285.059076,433.975657,...,6.418954,6.602783,6.817426,0.104495,0.054053,0.037215,0.036752,0.035067,0.033784,0.033635
1246,96.427791,194.559695,339.342371,453.851352,559.569968,685.552521,832.626982,145.965754,280.505056,451.214474,...,6.435864,6.625427,6.813158,0.105945,0.093316,0.07281,0.064424,0.054843,0.048719,0.044573
1270,95.06562,219.732802,364.354938,431.051024,533.693209,644.418758,794.175908,143.682996,285.172332,517.512144,...,6.405913,6.589603,6.785672,0.105774,0.066534,0.089358,0.077983,0.0671,0.064332,0.05614


In [602]:
len(relay_estimates)
relay_estimates[["fin_sum_log_mean_1", "fin_sum_log_std_1", "fin_sum_log_mean_2", "fin_sum_log_std_2", "fin_sum_log_mean_3", "fin_sum_log_std_3"]]

Unnamed: 0_level_0,fin_sum_log_mean_1,fin_sum_log_std_1,fin_sum_log_mean_2,fin_sum_log_std_2,fin_sum_log_mean_3,fin_sum_log_std_3
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
429,4.583128,0.080875,5.337685,0.054378,5.756673,0.053298
568,4.587335,0.131458,5.311839,0.079218,5.82668,0.054168
1131,4.822963,0.104495,5.544263,0.054053,5.998371,0.037215
1246,4.777691,0.105945,5.452037,0.093316,5.969621,0.07281
1270,4.759976,0.105774,5.523052,0.066534,6.067807,0.089358


In [588]:
# Flatten the troublesome multi-index to field_{leg} etc...
by_teams_flat = by_teams.copy()
by_teams_flat.columns = [f'{x[0]}_{x[1]}' for x in by_teams_flat.columns]
by_teams_flat.reset_index()


Unnamed: 0,team_id,team_1,team_2,team_3,team_4,team_5,team_6,team_7,team_base_name_1,team_base_name_2,...,ind_mean_5,ind_mean_6,ind_mean_7,ind_median_1,ind_median_2,ind_median_3,ind_median_4,ind_median_5,ind_median_6,ind_median_7
0,429,Reaktor Innovations 1,Reaktor Innovations 1,Reaktor Innovations 1,Reaktor Innovations 1,Reaktor Innovations 1,Reaktor Innovations 1,Reaktor Innovations 1,Reaktor Innovations,Reaktor Innovations,...,88.987969,91.145398,161.474599,97.792383,109.812846,107.805019,86.19078,88.917301,90.759473,161.284847
1,568,Rastihaukat 1,Rastihaukat 1,Rastihaukat 1,Rastihaukat 1,Rastihaukat 1,Rastihaukat 1,Rastihaukat 1,Rastihaukat,Rastihaukat,...,91.516128,98.451282,137.171722,98.429519,103.747618,136.125039,84.974872,90.809052,98.061808,136.561204
2,1131,Puskasilimät OK 1,Puskasilimät OK 1,Puskasilimät OK 1,Puskasilimät OK 1,Puskasilimät OK 1,Puskasilimät OK 1,Puskasilimät OK 1,Puskasilimät OK,Puskasilimät OK,...,99.652408,123.706438,176.548436,124.333,131.055553,146.752693,110.577,99.115993,123.040544,175.5981
3,1246,Rastihaukat 2,Rastihaukat 2,Rastihaukat 2,Rastihaukat 2,Rastihaukat 2,Rastihaukat 2,Rastihaukat 2,Rastihaukat,Rastihaukat,...,108.639768,130.055791,155.801222,119.13,113.319264,157.014619,122.436943,108.272688,129.329552,154.826596
4,1270,Reaktor Innovations 2,Reaktor Innovations 2,Reaktor Innovations 2,Reaktor Innovations 2,Reaktor Innovations 2,Reaktor Innovations 2,Reaktor Innovations 2,Reaktor Innovations,Reaktor Innovations,...,105.867314,121.916293,157.389252,116.864,133.115223,178.927107,67.174301,105.271651,119.808344,156.5037


In [589]:
estimates = pd.concat([by_teams_flat, relay_estimates], axis=1, join='inner')

In [590]:
# Convert minutes to date and times
start_timestamp = pd.Timestamp(year = 2018, month = 6, day = 16, hour = 23)

for leg in range(1,8):
    estimates[f"fint_median_{leg}"] = pd.to_datetime(estimates[f"fin_median_{leg}"] * 60, unit = "s", origin= start_timestamp)
    estimates[f"fint_start95_{leg}"] = pd.to_datetime(estimates[f"fin_start95_{leg}"] * 60, unit = "s", origin= start_timestamp)
    estimates[f"fint_end95_{leg}"] = pd.to_datetime(estimates[f"fin_end95_{leg}"] * 60, unit = "s", origin= start_timestamp)


In [591]:
# Sort teams by estimated total time 
estimates = estimates.sort_values("fin_median_7")

estimates.to_csv('data/team_estimates_ju2018.tsv', sep="\t")

In [592]:
estimates[["team_1", "fin_median_7"]].head(10)

Unnamed: 0_level_0,team_1,fin_median_7
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1
429,Reaktor Innovations 1,745.187133
568,Rastihaukat 1,751.437971
1270,Reaktor Innovations 2,885.345873
1246,Rastihaukat 2,909.895931
1131,Puskasilimät OK 1,913.749581


In [593]:
teams_to_follow = estimates[estimates['team_1'].str.contains("Reak") | estimates['team_1'].str.contains("Puskasil") | estimates['team_1'].str.contains("Rastihaukat")]
teams_to_follow[["team_1", "fin_median_7", "fin_start95_7", "fin_end95_7"]]

Unnamed: 0_level_0,team_1,fin_median_7,fin_start95_7,fin_end95_7
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
429,Reaktor Innovations 1,745.187133,699.130783,796.86853
568,Rastihaukat 1,751.437971,698.340527,809.394366
1270,Reaktor Innovations 2,885.345873,794.175908,991.371112
1246,Rastihaukat 2,909.895931,832.626982,992.034955
1131,Puskasilimät OK 1,913.749581,855.153383,976.018056


In [594]:
leg_1_cols = list(filter(lambda c: "_1" in c,estimates.columns.values))
column_base_names = list(map(lambda c: c[:-2], leg_1_cols))
runner_estimates = pd.wide_to_long(estimates.reset_index(), stubnames=column_base_names, i ="team_id", j="leg", sep = "_").sort_values(by=['team_id', 'leg'])
runner_estimates = runner_estimates.drop(['team_base_name', 'estimated_log_means', 'estimated_log_stdevs'], axis=1)
runner_estimates.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,team,leg_dist,name,history_log_means,history_log_stdevs,log_means,log_stdevs,ind_95_start,ind_95_end,ind_mean,ind_median,fin_start95,fin_end95,fin_median,fin_mean,fin_sum_log_mean,fin_sum_log_std,fint_median,fint_start95,fint_end95
team_id,leg,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
429,1,Reaktor Innovations 1,11.0,Mikko Peltonen,2.184951,0.081299,2.184951,0.081299,83.388003,114.684964,98.116094,97.792383,83.214404,114.544494,97.846379,98.140039,4.583128,0.080875,2018-06-17 00:37:50.782726,2018-06-17 00:23:12.864252,2018-06-17 00:54:32.669652
429,2,Reaktor Innovations 1,11.9,Pasi Huhtiniemi,2.222239,0.072482,2.222239,0.072482,95.270067,126.575551,110.101683,109.812846,187.291538,231.640467,207.917687,208.33845,5.337685,0.054378,2018-06-17 02:27:55.061244,2018-06-17 02:07:17.492305,2018-06-17 02:51:38.428008
429,3,Reaktor Innovations 1,12.8,Joni Freeman,2.130879,0.113846,2.130879,0.113846,86.24488,134.754922,108.505909,107.805019,285.380493,351.608122,316.098982,316.744045,5.756673,0.053298,2018-06-17 04:16:05.938895,2018-06-17 03:45:22.829551,2018-06-17 04:51:36.487322
429,4,Reaktor Innovations 1,8.7,Tuomas Kareinen,2.29324,0.16007,2.29324,0.16007,62.980977,117.953879,87.302084,86.19078,363.046091,449.447474,403.495193,404.161067,6.000325,0.054527,2018-06-17 05:43:29.711553,2018-06-17 05:03:02.765455,2018-06-17 06:29:26.848432
429,5,Reaktor Innovations 1,8.7,Janne Vaittinen,2.324384,0.039861,2.324384,0.039861,82.235006,96.142589,88.987969,88.917301,451.179125,539.245744,492.487243,493.126516,6.199739,0.045293,2018-06-17 07:12:29.234555,2018-06-17 06:31:10.747476,2018-06-17 07:59:14.744652


In [595]:
runner_estimates.to_csv('data/runner_estimates_ju2018.tsv', sep="\t")

In [596]:
runner_estimates[runner_estimates['team'].str.contains("Reak")][["name", "log_means", "log_stdevs", "ind_median"]]


Unnamed: 0_level_0,Unnamed: 1_level_0,name,log_means,log_stdevs,ind_median
team_id,leg,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
429,1,Mikko Peltonen,2.184951,0.081299,97.792383
429,2,Pasi Huhtiniemi,2.222239,0.072482,109.812846
429,3,Joni Freeman,2.130879,0.113846,107.805019
429,4,Tuomas Kareinen,2.29324,0.16007,86.19078
429,5,Janne Vaittinen,2.324384,0.039861,88.917301
429,6,Olavi Kanerva,2.128667,0.092121,90.759473
429,7,Jyri Kytömäki,2.355319,0.048494,161.284847
1270,1,Antti-Ville Jokela,2.363116,0.10623,116.864
1270,2,Karri-Pekka Laakso,2.414677,0.081841,133.115223
1270,3,Oskari Pirttikoski,2.637533,0.194072,178.927107


In [603]:
for_print = runner_estimates.copy()
for_print = for_print.reset_index()
for_print = for_print.set_index('team_id')
for_print = for_print.round(2)
for_print.fint_median = for_print.fint_median.dt.strftime("%H:%M")
for_print.fint_start95 = for_print.fint_start95.dt.strftime("%H:%M")
for_print.fint_end95 = for_print.fint_end95.dt.strftime("%H:%M")
for_print = for_print[['team',
 'leg',
 'name',
 'ind_median', 
 'ind_95_start', 
 'ind_95_end', 
 'fin_median',
 'fint_median',
 'fint_start95',
 'fint_end95']]
for_print.head()

Unnamed: 0_level_0,team,leg,name,ind_median,ind_95_start,ind_95_end,fin_median,fint_median,fint_start95,fint_end95
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
429,Reaktor Innovations 1,1,Mikko Peltonen,97.79,83.39,114.68,97.85,00:37,00:23,00:54
429,Reaktor Innovations 1,2,Pasi Huhtiniemi,109.81,95.27,126.58,207.92,02:27,02:07,02:51
429,Reaktor Innovations 1,3,Joni Freeman,107.81,86.24,134.75,316.1,04:16,03:45,04:51
429,Reaktor Innovations 1,4,Tuomas Kareinen,86.19,62.98,117.95,403.5,05:43,05:03,06:29
429,Reaktor Innovations 1,5,Janne Vaittinen,88.92,82.24,96.14,492.49,07:12,06:31,07:59


In [598]:
for_print.to_csv('for_print_ju2018.tsv', sep="\t")

In [599]:
for_print[for_print['team'].str.contains("Reak") | for_print['team'].str.contains("Puskasil") | for_print['team'].str.contains("Rastihaukat 2")]


Unnamed: 0_level_0,team,leg,name,ind_median,ind_95_start,ind_95_end,fin_median,fint_median,fint_start95,fint_end95
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
429,Reaktor Innovations 1,1,Mikko Peltonen,97.8,83.4,114.7,97.8,00:37,00:23,00:54
429,Reaktor Innovations 1,2,Pasi Huhtiniemi,109.8,95.3,126.6,207.9,02:27,02:07,02:51
429,Reaktor Innovations 1,3,Joni Freeman,107.8,86.2,134.8,316.1,04:16,03:45,04:51
429,Reaktor Innovations 1,4,Tuomas Kareinen,86.2,63.0,118.0,403.5,05:43,05:03,06:29
429,Reaktor Innovations 1,5,Janne Vaittinen,88.9,82.2,96.1,492.5,07:12,06:31,07:59
429,Reaktor Innovations 1,6,Olavi Kanerva,90.8,75.8,108.7,583.6,08:43,07:59,09:32
429,Reaktor Innovations 1,7,Jyri Kytömäki,161.3,146.7,177.4,745.2,11:25,10:39,12:16
1131,Puskasilimät OK 1,1,Tuomas Ketonen,124.3,101.4,152.4,124.4,01:04,00:41,01:32
1131,Puskasilimät OK 1,2,Petri Miettinen,131.1,122.1,140.6,255.6,03:15,02:50,03:45
1131,Puskasilimät OK 1,3,Jaakko Havola,146.8,135.8,158.6,402.7,05:42,05:15,06:13


In [625]:
for_web = runner_estimates.copy().reset_index()
for_web = for_web[[
 'team_id',
 'leg',
 'team',
 'name',
 'fin_sum_log_mean', 
 'fin_sum_log_std',
 'fin_median',
 'fint_median']]
for_web

Unnamed: 0,team_id,leg,team,name,fin_sum_log_mean,fin_sum_log_std,fin_median,fint_median
0,429,1,Reaktor Innovations 1,Mikko Peltonen,4.583128,0.080875,97.846379,2018-06-17 00:37:50.782726000
1,429,2,Reaktor Innovations 1,Pasi Huhtiniemi,5.337685,0.054378,207.917687,2018-06-17 02:27:55.061244000
2,429,3,Reaktor Innovations 1,Joni Freeman,5.756673,0.053298,316.098982,2018-06-17 04:16:05.938895000
3,429,4,Reaktor Innovations 1,Tuomas Kareinen,6.000325,0.054527,403.495193,2018-06-17 05:43:29.711553000
4,429,5,Reaktor Innovations 1,Janne Vaittinen,6.199739,0.045293,492.487243,2018-06-17 07:12:29.234555000
5,429,6,Reaktor Innovations 1,Olavi Kanerva,6.369545,0.040714,583.573908,2018-06-17 08:43:34.434460000
6,429,7,Reaktor Innovations 1,Jyri Kytömäki,6.613788,0.033445,745.187133,2018-06-17 11:25:11.227991000
7,568,1,Rastihaukat 1,Harri Kauppinen,4.587335,0.131458,98.191884,2018-06-17 00:38:11.513009999
8,568,2,Rastihaukat 1,Tuomas Helin,5.311839,0.079218,202.564634,2018-06-17 02:22:33.878041000
9,568,3,Rastihaukat 1,Teemu Siniranta,5.82668,0.054168,338.99426,2018-06-17 04:38:59.655618000


In [626]:
for_web.to_json('web-lib/for_web_ju2018.json', orient="records")