In [42]:
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")
import time

Now I look at the workout data, which contains the intensity score and the time spend in each heart rate zone when exercising.

First, I take the file workouts.csv, drop the two subjects whose data I won't use, and fill in the missing days manually just like I did when analyzing sleep data before loading the csv again. The missing days here mean that a subject did not record an activity during a given day, whether by intentionally taking a day off from exercise, losing the WHOOP strap, or not wearing the WHOOP strap. 

In [43]:
#workouts = pd.read_csv('workouts.csv')
#workouts = workouts[workouts.user_id!=2509]
#workouts = workouts[workouts.user_id!=2461]
#workouts.to_csv('workouts_temp.csv')

#Manually input missing days

workouts = pd.read_csv('workouts_temp.csv')
workouts.head(30)

Unnamed: 0,user_id,start,end,date_md,sport_id,name,intensity_score,z1,z2,z3,z4,z5
0,828,9/15/2015 20:08,9/15/2015 22:56,09/15/15,35.0,Track & Field,20.508564,1066.0,1810.0,1432.0,4668.0,679.0
1,828,,,09/16/15,,,,,,,,
2,828,,,09/17/15,,,,,,,,
3,828,,,09/18/15,,,,,,,,
4,828,9/19/2015 12:46,9/19/2015 14:09,09/19/15,35.0,Track & Field,16.823285,506.0,601.0,1861.0,696.0,22.0
5,828,9/20/2015 13:15,9/20/2015 16:00,09/20/15,35.0,Track & Field,20.506562,929.0,1302.0,1329.0,5353.0,321.0
6,828,9/21/2015 18:55,9/21/2015 18:58,09/21/15,1.0,Cycling,4.278678,8.0,170.0,5.0,0.0,0.0
7,828,9/21/2015 19:19,9/21/2015 21:12,09/21/15,35.0,Track & Field,19.176787,655.0,1757.0,1372.0,2781.0,28.0
8,828,9/22/2015 20:31,9/22/2015 22:00,09/22/15,35.0,Track & Field,18.825382,767.0,992.0,1291.0,806.0,1323.0
9,828,9/23/2015 19:31,9/23/2015 20:21,09/23/15,35.0,Track & Field,16.790126,126.0,499.0,703.0,1452.0,89.0


The calculation for WHOOP's intensity score was updated, so I added those in here. 

In [44]:
#updated_intensity_scores = pd.read_csv('updated_intensity_scores.csv')
#updated_intensity_scores=updated_intensity_scores.sort(['user_id', 'date_md', 'new_intensity_score'], 
#                                                       ascending=[True, True, False]).reset_index(drop=True)
#updated_intensity_scores=updated_intensity_scores.drop(['activity_id'], axis=1)
#updated_intensity_scores=updated_intensity_scores[updated_intensity_scores['user_id']!=2509]
#updated_intensity_scores=updated_intensity_scores[updated_intensity_scores['user_id']!=2461]
#updated_intensity_scores.to_csv('updated_intensity_scores_temp.csv')

#filled in missing dates manually
updated_intensity_scores = pd.read_csv('updated_intensity_scores_temp.csv')
hold = pd.concat([workouts, updated_intensity_scores], axis=1)
fixed_workouts = hold.T.drop_duplicates().T
fixed_workouts = fixed_workouts.drop(['date_md2', 'old_intensity_score'], axis=1)
fixed_workouts.head(10)

Unnamed: 0,user_id,start,end,date_md,sport_id,name,intensity_score,z1,z2,z3,z4,z5,new_intensity_score
0,828,9/15/2015 20:08,9/15/2015 22:56,09/15/15,35.0,Track & Field,20.50856,1066.0,1810.0,1432.0,4668.0,679.0,20.5
1,828,,,09/16/15,,,,,,,,,
2,828,,,09/17/15,,,,,,,,,
3,828,,,09/18/15,,,,,,,,,
4,828,9/19/2015 12:46,9/19/2015 14:09,09/19/15,35.0,Track & Field,16.82329,506.0,601.0,1861.0,696.0,22.0,14.1
5,828,9/20/2015 13:15,9/20/2015 16:00,09/20/15,35.0,Track & Field,20.50656,929.0,1302.0,1329.0,5353.0,321.0,20.5
6,828,9/21/2015 18:55,9/21/2015 18:58,09/21/15,1.0,Cycling,4.278678,8.0,170.0,5.0,0.0,0.0,4.3
7,828,9/21/2015 19:19,9/21/2015 21:12,09/21/15,35.0,Track & Field,19.17679,655.0,1757.0,1372.0,2781.0,28.0,18.1
8,828,9/22/2015 20:31,9/22/2015 22:00,09/22/15,35.0,Track & Field,18.82538,767.0,992.0,1291.0,806.0,1323.0,18.2
9,828,9/23/2015 19:31,9/23/2015 20:21,09/23/15,35.0,Track & Field,16.79013,126.0,499.0,703.0,1452.0,89.0,4.1


Now I'd like to sum up all of the heart rate zone data for each day - in other words, I only want one user-date row with all summed heart rate zone data. Intensity scores are not summed up to come up with the total intensity score for the day, so I drop those here. 

In [45]:
#Getting workouts down to 1 per day, with the summation of z1/z2/z3/z4/z5

#intensity and new_intensity scores are dropped here, need to approach those differently. Will add another section
fixed_workouts = fixed_workouts.drop(['intensity_score', 'new_intensity_score'], axis=1)
workouts2 = fixed_workouts.groupby(['user_id', 'date_md']).sum().reset_index()
workouts2 = workouts2.sort(['user_id', 'date_md'], ascending=[True, True])

#Fill in missing heart rate zone data with zeros
workouts2['z1'].fillna(0, inplace=True)
workouts2['z2'].fillna(0, inplace=True)
workouts2['z3'].fillna(0, inplace=True)
workouts2['z4'].fillna(0, inplace=True)
workouts2['z5'].fillna(0, inplace=True)

In [46]:
workouts2

Unnamed: 0,user_id,date_md,start,end,sport_id,name,z1,z2,z3,z4,z5
0,828,09/15/15,9/15/2015 20:08,9/15/2015 22:56,35,Track & Field,1066,1810,1432,4668,679
1,828,09/16/15,,,,,0,0,0,0,0
2,828,09/17/15,,,,,0,0,0,0,0
3,828,09/18/15,,,,,0,0,0,0,0
4,828,09/19/15,9/19/2015 12:46,9/19/2015 14:09,35,Track & Field,506,601,1861,696,22
5,828,09/20/15,9/20/2015 13:15,9/20/2015 16:00,35,Track & Field,929,1302,1329,5353,321
6,828,09/21/15,9/21/2015 18:559/21/2015 19:19,9/21/2015 18:589/21/2015 21:12,36,CyclingTrack & Field,663,1927,1377,2781,28
7,828,09/22/15,9/22/2015 20:31,9/22/2015 22:00,35,Track & Field,767,992,1291,806,1323
8,828,09/23/15,9/23/2015 19:319/23/2015 20:47,9/23/2015 20:219/23/2015 20:58,79,Track & FieldYoga,268,512,703,1452,89
9,828,09/24/15,,,,,0,0,0,0,0


This function takes the end of a user's sleep and converts the date and time into epoch time in milliseconds, creating a new variable. I then apply the function to the workouts dataframe. This is almost identical to the function used in sleeps_analysis.ipynb, except rather than look at the end of the sleep, it looks at the actual date at 23:59 and calculates the epoch date.

In [47]:
def epoch_end_convert(df):
    '''
    takes dataframe and converts date column to epoch milliseconds
    inputs:
        df = dataframe name
    '''
    return (int(time.mktime(time.strptime(df['date_md']+' 23:59', '%m/%d/%y %H:%M'))) - 4400)*1000

In [48]:
#workouts_subset['end_epoch'] = workouts_subset.apply(epoch_end_convert, axis=1)
workouts2['end_epoch'] = workouts2.apply(epoch_end_convert, axis=1)

In [49]:
workouts2.head()

Unnamed: 0,user_id,date_md,start,end,sport_id,name,z1,z2,z3,z4,z5,end_epoch
0,828,09/15/15,9/15/2015 20:08,9/15/2015 22:56,35.0,Track & Field,1066,1810,1432,4668,679,1442371540000
1,828,09/16/15,,,,,0,0,0,0,0,1442457940000
2,828,09/17/15,,,,,0,0,0,0,0,1442544340000
3,828,09/18/15,,,,,0,0,0,0,0,1442630740000
4,828,09/19/15,9/19/2015 12:46,9/19/2015 14:09,35.0,Track & Field,506,601,1861,696,22,1442717140000


#Adding in Time Periods (For Either Bikes or Races)

Like with the sleep data, I add in the bike and race period for each day, using the epoch dates .csv file

In [50]:
date_markers = pd.read_csv('epoch_dates.csv')

In [51]:
date_markers.head()

Unnamed: 0,ET_Date,date_start_epoch,race_period,bike_period,race_period_start,race_period_end,bike_period_start,bike_period_end
0,9/15/2015 0:00,1442289600000,1,1,1,0,1,0
1,9/16/2015 0:00,1442376000000,1,1,0,0,0,0
2,9/17/2015 0:00,1442462400000,1,1,0,0,0,0
3,9/18/2015 0:00,1442548800000,1,1,0,0,0,0
4,9/19/2015 0:00,1442635200000,1,1,0,0,0,0


Creating the race_start_dict like in the sleeps analysis

In [52]:
date_markers = date_markers[date_markers['race_period_start'] > 0].reset_index()
#dictionary of race period to start epoch
race_start_dict = {}
for i in range(0, len(date_markers)):
    race_start_dict[date_markers.values[i][3]] = date_markers.values[i][2]

#date_markers = date_markers[date_markers['bike_period_start'] > 0].reset_index()
#bike_start_dict = {}
#for i in range(0, len(date_markers)):
#    bike_start_dict[date_markers.values[i][4]] = date_markers.values[i][2]


A unique list of subject IDs

In [53]:
users = list(set(workouts2.user_id.values))

In [54]:
race_start_dict

{0L: 1448164800000L,
 1L: 1442289600000L,
 2L: 1443240000000L,
 3L: 1445054400000L,
 4L: 1446264000000L,
 5L: 1447473600000L}

The functions add_race, add_bike, and add_periods are all the same as in the sleeps analysis. They just take add race and bike periods to a dataframe given the epoch dates for each day.

In [55]:
def add_race(df):
    if (df.end_epoch >= race_start_dict[1]) and (df.end_epoch < race_start_dict[2]):
        return 1
    elif (df.end_epoch >= race_start_dict[2]) and (df.end_epoch < race_start_dict[3]):
        return 2
    elif (df.end_epoch >= race_start_dict[3]) and (df.end_epoch < race_start_dict[4]):
        return 3
    elif (df.end_epoch >= race_start_dict[4]) and (df.end_epoch < race_start_dict[5]):
        return 4
    elif (df.end_epoch >= race_start_dict[5]) and (df.end_epoch < 1448164800000):
        return 5
    else:
        return 0


In [56]:
def add_bike(df, bike_start_dict):
    for user in users:
        if df.user_id == user:
            if (df.end_epoch <= bike_start_dict[user, 1]):
                return 1
            elif (df.end_epoch <= bike_start_dict[user, 2]) and (df.end_epoch >= bike_start_dict[user, 1]):
                return 2
            elif (df.end_epoch <= bike_start_dict[user, 3]) and (df.end_epoch >= bike_start_dict[user, 2]):
                return 3
            elif (df.end_epoch <= bike_start_dict[user, 4]) and (df.end_epoch >= bike_start_dict[user, 3]):
                return 4
            elif (df.end_epoch <= bike_start_dict[user, 5]) and (df.end_epoch >= bike_start_dict[user, 4]):
                return 5
            elif (df.end_epoch <= bike_start_dict[user, 6]) and (df.end_epoch >= bike_start_dict[user, 5]):
                return 6
            elif (df.end_epoch <= bike_start_dict[user, 7]) and (df.end_epoch >= bike_start_dict[user, 6]):
                return 7
            elif (df.end_epoch <= bike_start_dict[user, 8]) and (df.end_epoch >= bike_start_dict[user, 7]):
                return 8
            elif (df.end_epoch >= bike_start_dict[user, 8]):
                return 9
                

In [57]:
def add_periods(date_markers, df, buildup_days=0, version=None):
    if version==None:
        print "Enter Either 'race' or 'bike'!"
    if version=="race":
        #Collapse the data frame into only the start and end dates for each race period
        date_markers = date_markers[date_markers['race_period_start'] > 0].reset_index()
        #dictionary of race period to start epoch
        race_start_dict = {}
        for i in range(0, len(date_markers)):
            race_start_dict[date_markers.values[i][3]] = date_markers.values[i][2]
        #Now label each race period as 1, 2, 3, 4, or 5
        df['race_period'] = df.apply(add_race, axis=1)
        
        #add in buildup days
        df=df.sort(['user_id', 'race_period', 'end_epoch'], ascending=[True, True, False]).reset_index(drop=True)
        race_groups = df.groupby(['user_id', 'race_period']).cumcount()
        build_days = pd.DataFrame(dict(buildup_days = race_groups))
        new_df = df.join(build_days, how="left")
        return_df = new_df
    
    elif version=="bike":
        #same process, but with bike efforts
        bike_dates = pd.read_csv('huxc_bike_distances.csv')
        bike_dates['next_day'] = bike_dates['date_start_epoch'] + 86400000
        bike_start_dict={} #key is the user_id and bike period, value is the date start epoch
        for i in range(0, len(bike_dates)):
            bike_start_dict[bike_dates.values[i][0], bike_dates.values[i][1]] = bike_dates.values[i][5]
        #cycle through bike_start dict, enter placement values so day gets filled
        for user in users:
            for i in range(1, 10):
                if np.isnan(bike_start_dict[user, i]) and i == 1:
                    bike_start_dict[user, i] = bike_start_dict[2469, i]
                if np.isnan(bike_start_dict[user, i]) and i == 2:
                    bike_start_dict[user, i] = bike_start_dict[2469, i]
                if np.isnan(bike_start_dict[user, i]) and i == 3:
                    bike_start_dict[user, i] = bike_start_dict[2469, i] 
                if np.isnan(bike_start_dict[user, i]) and i == 4:
                    bike_start_dict[user, i] = bike_start_dict[2469, i]
                if np.isnan(bike_start_dict[user, i]) and i == 5:
                    bike_start_dict[user, i] = bike_start_dict[2469, i]
                if np.isnan(bike_start_dict[user, i]) and i == 6:
                    bike_start_dict[user, i] = bike_start_dict[2469, i]
                if np.isnan(bike_start_dict[user, i]) and i == 7:
                    bike_start_dict[user, i] = bike_start_dict[2469, i]
                if np.isnan(bike_start_dict[user, i]) and i == 8:
                    bike_start_dict[user, i] = bike_start_dict[2469, i]
                if np.isnan(bike_start_dict[user, i]) and i == 9:
                    bike_start_dict[user, i] = bike_start_dict[2469, i]
        df['bike_period'] = df.apply(add_bike, axis=1, args=(bike_start_dict,))
        df=df.sort(['user_id', 'bike_period', 'end_epoch'], ascending=[True, True, False]).reset_index(drop=True)
        bike_groups = df.groupby(['user_id', 'bike_period']).cumcount()
        build_days = pd.DataFrame(dict(buildup_days = bike_groups))
        new_df = df.join(build_days, how="left")
        return_df = new_df
        
    if buildup_days != 0:
        return_df = return_df[return_df['buildup_days'] < buildup_days]
    
    final_return_df = return_df[return_df['race_period']!=0]
    return return_df

In [58]:
test = add_periods(date_markers=date_markers, df=workouts2, buildup_days=14, version="race")
test = test.sort(['user_id', 'date_md'], ascending=['True', 'True']).reset_index(drop=True)
test = test[test['race_period']!=0]
test = test.drop(['sport_id'], axis=1)
test.head(25)

Unnamed: 0,user_id,date_md,start,end,name,z1,z2,z3,z4,z5,end_epoch,race_period,buildup_days
0,828,09/15/15,9/15/2015 20:08,9/15/2015 22:56,Track & Field,1066,1810,1432,4668,679,1442371540000,1,10
1,828,09/16/15,,,,0,0,0,0,0,1442457940000,1,9
2,828,09/17/15,,,,0,0,0,0,0,1442544340000,1,8
3,828,09/18/15,,,,0,0,0,0,0,1442630740000,1,7
4,828,09/19/15,9/19/2015 12:46,9/19/2015 14:09,Track & Field,506,601,1861,696,22,1442717140000,1,6
5,828,09/20/15,9/20/2015 13:15,9/20/2015 16:00,Track & Field,929,1302,1329,5353,321,1442803540000,1,5
6,828,09/21/15,9/21/2015 18:559/21/2015 19:19,9/21/2015 18:589/21/2015 21:12,CyclingTrack & Field,663,1927,1377,2781,28,1442889940000,1,4
7,828,09/22/15,9/22/2015 20:31,9/22/2015 22:00,Track & Field,767,992,1291,806,1323,1442976340000,1,3
8,828,09/23/15,9/23/2015 19:319/23/2015 20:47,9/23/2015 20:219/23/2015 20:58,Track & FieldYoga,268,512,703,1452,89,1443062740000,1,2
9,828,09/24/15,,,,0,0,0,0,0,1443149140000,1,1


#Aggregating Weighted Rolling Average of Variables Leading up To Races/Bikes

I go through almost the same process as before to create a weighted rolling average of a variable. The only differene is I do NOT include race day here - including the heart rate zone data on race day in the weighted average isn't necessary - I care more about how hard someone has worked the days before the race, not on race day. 

In [59]:
def filter_nans(x, y):
    filtered = filter(lambda o: not np.isnan(o[0]) and not np.isnan(o[1]), zip(x, y))    
    return [el[0] for el in filtered], [el[1] for el in filtered]

In [60]:
def get_item(df_column_tuple):
    return df_column_tuple[0], df_column_tuple[1]

In [66]:
def weighted_rolling_average(data):
    #must take into account nan
    #number of things in values
    n = len(data)
    weights = []
    values = []
    
    #The only difference from sleeps analysis - I look at range (0, n-1) instead of n
    for i in range(0, n-1):
        values.append(data[i])
        weights.append(i+1)
    numbers = [w*v for w,v in zip(weights,values)]
    filtered_numbers, filtered_weights = filter_nans(numbers, weights)
    return sum(filtered_numbers)/sum(filtered_weights)

In [62]:
def weighted_rolling_average_df(df, activity_type, variable):
    weighted_avg_dict = {}
    periods = []
    if activity_type=='bike':
        periods = [1,2,3,4,5,6,7,8,9]
        for u in users:
            user_df = df[df['user_id']==u]
            for i in range(0, len(periods)):
                user_period_df=user_df[user_df['bike_period']==periods[i]]
                w_r_a = weighted_rolling_average(user_period_df[variable].reset_index(drop=True))
                weighted_avg_dict[u, periods[i], variable] = w_r_a
        
    elif activity_type=='race':
        periods = [1,2,3,4,5]
        for u in users:
            user_df = df[df['user_id']==u]
            for i in range(0, len(periods)):
                user_period_df=user_df[user_df['race_period']==periods[i]]
                w_r_a = weighted_rolling_average(user_period_df[variable].reset_index(drop=True))
                weighted_avg_dict[u, periods[i], variable] = w_r_a
                
    
    agg_df = pd.DataFrame(weighted_avg_dict.items(), columns=['temp', variable+'_temp'])
    agg_df['user_id'], agg_df[activity_type+'_period'] = zip(*agg_df['temp'].map(get_item))
    agg_df[variable] = agg_df[variable+'_temp']
    agg_df = agg_df.sort(['user_id', activity_type+'_period'], ascending = [True, True]).reset_index(drop=True)
    agg_df = agg_df.drop(['temp', variable+'_temp'], axis=1)
    return agg_df

In [63]:
def make_intensity_df(df_to_calculate, activity_type):
    if activity_type == 'bike' or activity_type == 'race':
        new_df1 = weighted_rolling_average_df(df_to_calculate, activity_type, 'z1')
        new_df2 = weighted_rolling_average_df(df_to_calculate, activity_type, 'z2')
        new_df3 = weighted_rolling_average_df(df_to_calculate, activity_type, 'z3')
        new_df4 = weighted_rolling_average_df(df_to_calculate, activity_type, 'z4')
        new_df5 = weighted_rolling_average_df(df_to_calculate, activity_type, 'z5')
        merge_df1 = pd.merge(new_df1, new_df2, how='left')
        merge_df2 = pd.merge(merge_df1, new_df3, how='left')
        merge_df3 = pd.merge(merge_df2, new_df4, how='left')
        merged_df = pd.merge(merge_df3, new_df5, how='left')
        dummies = pd.get_dummies(merged_df['user_id'], prefix='user')
        final_df = pd.concat([merged_df, dummies], axis=1)
        final_df = final_df.drop(['user_828'], axis=1)
        if activity_type=='race':
            race_times = pd.read_csv('huxc_race_times.csv')
            race_times=race_times[race_times['user_id']!=2461]
            race_times=race_times[race_times['user_id']!=2509]
            final_race_df = final_df.merge(race_times, how='left')
            final_race_df = final_race_df[final_race_df['user_id']!=2439]
            return final_race_df
        if activity_type=='bike':
            bike_results = pd.read_csv('huxc_bike_distances.csv')
            bike_results = bike_results[bike_results['user_id']!=2461]
            bike_results = bike_results[bike_results['user_id']!=2509]
            bike_results = bike_results.drop(['date_md', 'date_start_epoch'], axis=1)
            final_bike_df = final_df.merge(bike_results, how='left')
            return final_bike_df
    else:
        return 'Enter either "race" or "bike", please.'

###For Races

Just like in sleeps analysis, I create all of these dataframes for analysis

In [67]:
test_race_temp = add_periods(date_markers=date_markers, df=workouts2, buildup_days=14, version="race")
test_race_temp = test_race_temp.sort(['user_id', 'date_md'], ascending=['True', 'True']).reset_index(drop=True)
test_race_temp = test_race_temp[test_race_temp['race_period']!=0]
test_race_temp = test_race_temp.drop(['sport_id', 'start', 'end', 'name'], axis=1)

race_times = pd.read_csv('huxc_race_times.csv')
race_times = race_times.drop(['seconds', 'pace_per_k', 'pace_time', 'FP_5K', 'Wisco_8K', 'Brown_8K', 'VCP_8K', 'FP_10K'], axis=1)
test_race = test_race_temp.merge(race_times, on=['user_id', 'race_period'],how='left')

test_race.to_csv('workout_race_df_pre_weighting.csv')

test_race_fivedays = test_race[test_race.buildup_days <=5]
test_race_sixdays = test_race[test_race.buildup_days <= 6]
test_race_sevendays = test_race[test_race.buildup_days <= 7]
test_race_tendays = test_race[test_race.buildup_days <= 10]
test_race_fourdays = test_race[test_race.buildup_days <= 4]
test_race_threedays = test_race[test_race.buildup_days <= 3]
test_race_twodays = test_race[test_race.buildup_days <= 2]

df = make_intensity_df(test_race, 'race')
df.to_csv('workout_final_race_df.csv')
df_five_weighted = make_intensity_df(test_race_fivedays, 'race')
df_five_weighted.to_csv('workout_final_race_df_5daysweighted.csv')
df_six_weighted = make_intensity_df(test_race_sixdays, 'race')
df_six_weighted.to_csv('workout_final_race_df_6daysweighted.csv')
df_seven_weighted = make_intensity_df(test_race_sevendays, 'race')
df_seven_weighted.to_csv('workout_final_race_df_7daysweighted.csv')
df_ten_weighted = make_intensity_df(test_race_tendays, 'race')
df_ten_weighted.to_csv('workout_final_race_df_10daysweighted.csv')
df_four_weighted = make_intensity_df(test_race_fourdays, 'race')
df_four_weighted.to_csv('workout_final_race_df_4daysweighted.csv')
df_three_weighted = make_intensity_df(test_race_threedays, 'race')
df_three_weighted.to_csv('workout_final_race_df_3daysweighted.csv')
df_two_weighted = make_intensity_df(test_race_twodays, 'race')
df_two_weighted.to_csv('workout_final_race_df_2daysweighted.csv')

###Bike efforts

In [68]:
test_bike = add_periods(date_markers=date_markers, df=workouts2, buildup_days=14, version="bike")
test_bike = test_bike.sort(['user_id', 'date_md'], ascending=['True', 'True']).reset_index(drop=True)
test_bike = test_bike.drop(['sport_id', 'race_period', 'start', 'end', 'name'], axis=1)

test_bike.to_csv('workout_bike_df_pre_weighting.csv')

test_bike_fivedays = test_bike[test_bike.buildup_days <=5]
test_bike_sixdays = test_bike[test_bike.buildup_days <= 6]
test_bike_sevendays = test_bike[test_bike.buildup_days <= 7]
test_bike_fourdays = test_bike[test_bike.buildup_days <= 4]
test_bike_threedays = test_bike[test_bike.buildup_days <= 3]
test_bike_twodays = test_bike[test_bike.buildup_days <= 2]
df = make_intensity_df(test_bike, 'bike')
df.to_csv('workout_final_bike_df.csv')

df_five_weighted = make_intensity_df(test_bike_fivedays, 'bike')
df_five_weighted.to_csv('workout_final_bike_df_5daysweighted.csv')
df_six_weighted = make_intensity_df(test_bike_sixdays, 'bike')
df_six_weighted.to_csv('workout_final_bike_df_6daysweighted.csv')
df_seven_weighted = make_intensity_df(test_bike_sevendays, 'bike')
df_seven_weighted.to_csv('workout_final_bike_df_7daysweighted.csv')
df_four_weighted = make_intensity_df(test_bike_fourdays, 'bike')
df_four_weighted.to_csv('workout_final_bike_df_4daysweighted.csv')
df_three_weighted = make_intensity_df(test_bike_threedays, 'bike')
df_three_weighted.to_csv('workout_final_bike_df_3daysweighted.csv')
df_two_weighted = make_intensity_df(test_bike_twodays, 'bike')
df_two_weighted.to_csv('workout_final_bike_df_2daysweighted.csv')

#Dealing with new intensity scores and adding to final_race_df.csv

In [None]:
#use fixed_workouts to start with