In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import logging
import shared
import os
import matplotlib.pyplot as plt

os.environ['RACE_TYPE'] = "ju"
os.environ['FORECAST_YEAR'] = "2024"
race_type = shared.race_type()
#year = shared.forecast_year()
import time
startTime = time.time()
sns.set(rc={"figure.figsize":(16, 9)}) 

# Extract the current figsize to determine height and aspect for lmplot
global_fig_width, global_fig_height = plt.rcParams['figure.figsize']
global_aspect_ratio = global_fig_width / global_fig_height

In [None]:
cleaned = pd.read_csv(f'Jukola-terrain/ideal-paces-{race_type}.tsv', delimiter='\t')

In [None]:
cleaned.sort_values(by=["year", "leg"], ascending=[False, True])

In [None]:
#sns.set(rc={"figure.figsize":(16, 9)}) 
plot = sns.scatterplot(x="year", y="ideal_pace", hue="leg", palette="bright", data=cleaned)



In [None]:

ax = sns.boxplot(x="year", y="ideal_pace", data=cleaned)

In [None]:
sns.boxplot(x="year", y="terrain_coefficient", data=cleaned)
plt.axhline(y=1, color='b', linestyle='--', zorder=-1)

In [None]:

ax = sns.boxplot(x="leg", y="ideal_pace", data=cleaned)

In [None]:
ax = sns.boxplot(x="year", y="marking", data=cleaned)

In [None]:
ax = sns.boxplot(x="year", y="vertical", data=cleaned)

In [None]:
plot = sns.scatterplot(x="vertical_per_km", y="ideal_pace", hue="year", palette="bright", data=cleaned[cleaned["vertical"].notna()])


In [None]:
cleaned

In [None]:
leg_means = cleaned[["leg", "ideal_pace"]].groupby(["leg"]).agg("mean")
leg_means

In [None]:
ax = sns.scatterplot(x="year", y="terrain_coefficient", hue="leg", palette="bright", data=cleaned)

In [None]:
ax = sns.boxplot(x="leg", y="terrain_coefficient", data=cleaned)

In [None]:
# To get all years use next year
runs = pd.read_csv(f'data/long_runs_and_running_order_{shared.race_id_str()}.tsv', delimiter='\t')
runs = runs.dropna(subset=['pace'])
runs = runs.drop(columns=['name'])
runs

In [None]:
runner_means = runs[["unique_name", "pace"]].groupby(["unique_name"]).agg("mean")
runner_means


In [None]:
def percentile(n):
    def percentile_(x):
        return np.percentile(x, n)
    percentile_.__name__ = 'p_%s' % n
    return percentile_

actual_stats_df = runs.groupby("year").agg({
    "pace": ["count", "min", 
             percentile(0.1), percentile(1), percentile(5), percentile(25), "mean", "median",
             percentile(75), percentile(95), percentile(99), "max"]}).round(2)
actual_stats_df

In [None]:
runs["pace_mean"] = runner_means["pace"][runs["unique_name"]].values
runs["personal_coefficient"] = runs["pace"] / runs["pace_mean"]
runs["below_personal_coefficient"] = runs["personal_coefficient"] < 1
runs.head()

In [None]:
runs.groupby("year").agg({
    "num_runs": "mean", 
    "below_personal_coefficient": "mean",
    "personal_coefficient": ["count", "min", 
             percentile(1), percentile(5), percentile(25), "mean", "median",
             percentile(75), percentile(95), percentile(99), "max"]}).round(2)

In [None]:
#runs_for_plotting = runs[runs["pace"] <=23].copy()
runs_for_plotting = runs
#runs_for_plotting["year"] = runs_for_plotting["year"].astype(str)
runs_for_plotting['series'] = 'Actual results'
runs_for_plotting['coefficient'] = runs_for_plotting['personal_coefficient']

cleaned['series'] = 'Ideal paces'
cleaned['pace'] = cleaned['ideal_pace']
cleaned['coefficient'] = cleaned['terrain_coefficient']
df_combined = pd.concat([runs_for_plotting[['year', 'pace', 'coefficient', 'series']], cleaned[['year', 'pace', 'coefficient', 'series']]], ignore_index=True)
ax = sns.boxplot(x="year", y="pace", data=df_combined, hue='series', showfliers=False)

In [None]:
selected_years_runs = runs_for_plotting[runs_for_plotting["year"].isin([2004, 2010, 2013, 2014, 2019, 2021,2022,2023])]
selected_years_runs = runs_for_plotting
sns.displot(x="pace", hue="year", kind="kde", height=6, aspect=1.7, palette="bright", data=selected_years_runs)

In [None]:
ax = sns.boxplot(x="year", y="coefficient", hue='series', showfliers=False, data=df_combined)
plt.axhline(y=1, color='b', linestyle='--', zorder=-1)
ax.set_title(f"Toteutuneet juoksijoiden keskinopeuskertoimet ja ratamestareiden ideaalinopeuskertoimet {os.environ['RACE_TYPE'].upper()}")

In [None]:
runs = pd.merge(runs, cleaned[["year", "leg", "terrain_coefficient", "marking", "vertical", "vertical_per_km", "ideal_pace"]], how="left", on=["year", "leg"])
runs["personal_terain_diff"] = runs["personal_coefficient"] - runs["terrain_coefficient"] 
runs.round(4)

In [None]:
ax = sns.lmplot(x="terrain_coefficient", y="personal_coefficient", data=runs, height=8, aspect=1.7, ci=50,scatter_kws={'alpha':0.07}, x_jitter=0.0004)
ax.set(ylim=(0.45, 1.8))

In [None]:
ax = sns.residplot(x="terrain_coefficient", y="personal_coefficient", lowess=True, data=runs,scatter_kws={'alpha':0.07})

In [None]:
ax = sns.lmplot(x="terrain_coefficient", y="personal_coefficient", hue="leg",data=runs, height=8, aspect=1.7, ci=50,scatter_kws={'alpha':0.07}, x_jitter=0.0003)
ax.set(ylim=(0.6, 1.6))

In [None]:
ax = sns.lmplot(x="terrain_coefficient", y="personal_coefficient", hue="year",data=runs, height=8, aspect=1.7, ci=50,scatter_kws={'alpha':0.07}, x_jitter=0.0003)
ax.set(ylim=(0.6, 1.6))

In [None]:
ax = sns.lmplot(x="marking", y="personal_coefficient", data=runs, height=8, aspect=1.7, ci=50,scatter_kws={'alpha':0.07}, x_jitter=3)
ax.set(ylim=(0.6, 1.6))

In [None]:
ax = sns.lmplot(x="vertical", y="personal_coefficient", data=runs, height=8, aspect=1.7, ci=50,scatter_kws={'alpha':0.07}, x_jitter=1)
ax.set(ylim=(0.6, 1.6))

In [None]:
ax = sns.lmplot(x="vertical_per_km", y="personal_coefficient", data=runs, height=8, aspect=1.7, ci=50,scatter_kws={'alpha':0.07}, x_jitter=0.1)
ax.set(ylim=(0.6, 1.6))

In [None]:
#ax = sns.boxplot(x="year", y="personal_terain_diff", data=runs.query("personal_terain_diff < 0.4 & personal_terain_diff > -0.3 & num_runs < 10"))
ax = sns.boxplot(x="year", y="personal_terain_diff", showfliers=False, data=runs)



In [None]:
ax = sns.lmplot(x="terrain_coefficient", y="personal_coefficient",row="leg", data=runs)

In [None]:
#ax = sns.lmplot(x="terrain_coefficient", y="personal_coefficient",row="year", data=runs)

In [None]:
#
#runs["log_team_id"]= np.log(runs["team_id"])
#ax = sns.lmplot(x="log_team_id", y="personal_coefficient", data=runs)

In [None]:
runner_name = "nelly carlsson"
if race_type == "ju":
    runner_name = "topi anjala"
    #runner_name = "topi penttinen"
    runner_name = "janne ala-äijälä"
    # runner_name = "jeppe koivula"
    #runner_name = "juha meronen"
    #runner_name = "oskari pirttikoski"
    #runner_name = "joni freeman"
    #runner_name = "jukka-pekka seppänen"

    
runner = runs[runs["name"] == runner_name]
ax = sns.lmplot(x="ideal_pace", y="pace",data=runner, height= global_fig_height, aspect=global_aspect_ratio)
plt.title(runner_name)

# Iterate through the DataFrame and add annotations
for index, row in runner.iterrows():
    plt.annotate(
        text=f"{row['year']} leg {row['leg']}", # This is the text to use for the annotation.
        xy=(row['ideal_pace'], row['pace']), # This is the point (x,y) to annotate.
        xytext=(5,5), # This is the distance from the text to the point (x,y).
        textcoords='offset points', # This specifies the coordinate system that xytext is given in.
        ha='right', # This is the horizontal alignment of the text.
        va='bottom' # This is the vertical alignment of the text.
    )
#runner.info()

In [None]:
runner.round(3)