In [1]:
import pandas as pd
import os
import glob
import numpy as np
from datetime import datetime

In [2]:
path = os.getcwd()
csv_files = glob.glob(os.path.join(path, "..", "HR Data", "*.csv"))

In [11]:
hr_list = [pd.read_csv(f).drop(["Start time", "Date", "Duration", "Average heart rate (bpm)", "Calories"], axis=1) for f
           in csv_files]

for df in hr_list:
    df["Player Number"] = int(df["Player Number"][0])

hr_dataframe = pd.concat(hr_list)
hr_dataframe["Time"] = hr_dataframe["Time"].apply(lambda d: datetime.strptime(d, "%H:%M:%S"))
hr_dataframe.rename(columns={'Player Number': 'player'}, inplace=True)

In [12]:
def hr_stats(hri):
    return pd.Series({"hr_min": hri["HR (bpm)"].min(),
                      "hr_max": hri["HR (bpm)"].max(),
                      "hr_mean": hri["HR (bpm)"].mean(),
                      "hr_std": hri["HR (bpm)"].std(),
                      "hr_peak": hri["HR (bpm)"][hri["HR (bpm)"] >= 0.9 * hri["HR (bpm)"].max()].count() / hri[
                          "HR (bpm)"].count(),
                      "hr_bottom": hri["HR (bpm)"][hri["HR (bpm)"] <= 1.1 * hri["HR (bpm)"].min()].count() / hri[
                          "HR (bpm)"].count()})


In [13]:
hr_player_summary = hr_dataframe.groupby(["player"]).apply(hr_stats)
hr_player_summary.sort_values(by="player").to_csv(os.path.join(path, "..", "Processed", "hr_summary.csv"))

In [14]:
runs = pd.read_csv(os.path.join(path, "..", "Processed", "run_stats.csv"))

runs["run_start"] = runs["run_start"].apply(lambda d: datetime.strptime(d, "%M:%S:%f"))
runs["run_end"] = runs["run_end"].apply(lambda d: datetime.strptime(d, "%M:%S:%f"))

In [36]:

def calculate_run(hri, runs):
    hri["attempt"] = -1
    player_runs = runs[["attempt", "run_start", "run_end"]][runs["player"] == hri["player"]]
    if player_runs["run_start"].min() <= hri["Time"] <= player_runs["run_end"].max():
        for index, row in player_runs.iterrows():
            if row["run_start"] <= hri["Time"] <= row["run_end"]:
                hri["attempt"] = row["attempt"]
                break
    return hri


runs_hr = hr_dataframe.apply(lambda hri: calculate_run(hri, runs), axis=1)

In [42]:
runs_hr.set_index(["player", "attempt"], inplace=True)
runs_hr.sort_values(by=["player", "attempt","time"]).to_csv(os.path.join(path, "..", "Processed", "runs_hr.csv"))

KeyError: "None of ['player', 'attempt'] are in the columns"

In [43]:
def run_hr_stats(hri, all_player_summary):
    player_number = hri.index[0][0]
    player_summary = all_player_summary.loc[[player_number]]
    hr_min = player_summary["hr_min"].min()
    hr_max = player_summary["hr_max"].max()
    return pd.Series({"hr_min": hri["HR (bpm)"].min(),
                      "hr_max": hri["HR (bpm)"].max(),
                      "hr_mean": hri["HR (bpm)"].mean(),
                      "hr_std": hri["HR (bpm)"].std(),
                      "hr_peak": hri["HR (bpm)"][hri["HR (bpm)"] >= 0.9 * hr_max].count() / hri[
                          "HR (bpm)"].count(),
                      "hr_bottom": hri["HR (bpm)"][hri["HR (bpm)"] <= 1.1 * hr_min].count() / hri[
                          "HR (bpm)"].count()})


hr_runs_summary = runs_hr.groupby(["player", "attempt"]).apply(lambda hri: run_hr_stats(hri, hr_player_summary))
hr_runs_summary.to_csv(os.path.join(path, "..", "Processed", "runs_hr_summary.csv"))