In [1]:
# import relevant libraries
import pandas as pd
import numpy as np
import re
import ast
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats

# Loading Data

In [2]:
# load as csv
results= pd.read_csv('/kaggle/input/my-own-coordinate-data/segment_results_1.csv')
consistency = pd.read_csv('/kaggle/input/my-own-coordinate-data/consistency_metrics_1.csv')

# Obtain Benchmark Data

Data stem from: Older Adults: (Rikli & Jones, 1999b; N = 7,183; age range = 60-94 years),Normative data for males (n = 2,135, Normative data for females (n = 5,048)

First, obtain cut-off scores to determine whether a test taker is physically independent or not.

In [3]:
# set age ranges and cut-off scores for both men and women
ages = ['60 - 64', '65 - 69', '70 - 74', '75 - 79', '80 - 84', '85 - 89', '90 - 94']
threshold_male = [106, 101, 95, 88, 80, 71, 60]
threshold_female = [97, 93, 89, 84, 78, 70, 60]

# store eveything as a dataframe
physical_independence = pd.DataFrame({
    'age_range': ages,
    'threshold_male': threshold_male,
    'threshold_female': threshold_female
})

# and add start age per age-group for convenience
physical_independence['start_age'] = [60, 65, 70, 75, 80, 85, 90]

In [4]:
physical_independence

Unnamed: 0,age_range,threshold_male,threshold_female,start_age
0,60 - 64,106,97,60
1,65 - 69,101,93,65
2,70 - 74,95,89,70
3,75 - 79,88,84,75
4,80 - 84,80,78,80
5,85 - 89,71,70,85
6,90 - 94,60,60,90


In [5]:
# set age ranges, start age, the mean number of steps taken as well as standard deviation for both women and men
ages = ['60 - 64', '65 - 69', '70 - 74', '75 - 79', '80 - 84', '85 - 89', '90 - 94']
start_ages = [60, 65, 70, 75, 80, 85, 90]
steps_male = [101, 101, 95, 91, 87, 75, 69]
steps_female = [91, 90, 84, 84, 75, 70, 58]
sd_male = [21, 23, 23, 27, 24, 24, 26]
sd_female = [24, 26, 25, 24, 23, 22, 21]

# store everything as one dataframe
benchmark_data = pd.DataFrame({
    'steps_female':  steps_female,
    'sd_female': sd_female,
    'steps_male': steps_male,
    'sd_male': sd_male, 
    'start_age': start_ages,
    'age_range': ages
})

In [6]:
benchmark_data

Unnamed: 0,steps_female,sd_female,steps_male,sd_male,start_age,age_range
0,91,24,101,21,60,60 - 64
1,90,26,101,23,65,65 - 69
2,84,25,95,23,70,70 - 74
3,84,24,91,27,75,75 - 79
4,75,23,87,24,80,80 - 84
5,70,22,75,24,85,85 - 89
6,58,21,69,26,90,90 - 94


## Interpolation for Full Benchmark Dataset

Given the limitations in available data, the benchmark data will be inerpolated to make use of trends in  within genders and over age ranges. First, the threshold scores for physical independence are interpolated for older test takers who fall out of the age ranges reported by Rikli & Jones (1999b). Please note that all interpolated scores are based on age and differentiated by gender. Given the lack of personal information regarding physical status including ailments, illnesses and disabilities, the physical independence status must be taken with a grain of salt and mainly speaks about age as a factor, which decreases performance. Therefore, cutoff-scoers for younger test takers are not interpolated. 

In [7]:
# compute differences between consecutive age ranges to obtain downwards trend in the physical independence threshold
physical_independence['male_diff'] = physical_independence['threshold_male'].diff().fillna(0)
physical_independence['female_diff'] = physical_independence['threshold_female'].diff().fillna(0)

In [8]:
# obtain the 'trend' as mean decline for women and men
mean_male_diff = physical_independence['male_diff'].mean()
mean_female_diff = physical_independence['female_diff'].mean()

In [9]:
# set new age ranges for adults between the ages of 95 and 120 yrs
new_ages = np.arange(95, 125, 5)
new_age_ranges = [f'{age} - {age + 4}' for age in new_ages]

In [10]:
# select last known thresholds for both women and men
last_male_score = physical_independence['threshold_male'].iloc[-1]
last_female_score = physical_independence['threshold_female'].iloc[-1]

In [11]:
# and continue to predict the new thresholds for each new age range per gender based on last known threshold and mean decline
predicted_male = [round(last_male_score + (i * mean_male_diff)) for i in range(1, len(new_ages) + 1)]
predicted_female = [round(last_female_score + (i * mean_female_diff)) for i in range(1, len(new_ages) + 1)]

In [12]:
# store interpolated values in new df
interpolated_cutoffs = pd.DataFrame({
    'age_range': new_age_ranges,
    'start_age': new_ages,
    'threshold_male': predicted_male,
    'threshold_female': predicted_female
})

# and concat my interpolated data to physical independence data from Rikli & Jones (1999b)
full_cutoff_data = pd.concat([physical_independence[['age_range', 'start_age', 'threshold_male', 'threshold_female']], interpolated_cutoffs]).reset_index(drop=True)

print(full_cutoff_data)

    age_range  start_age  threshold_male  threshold_female
0     60 - 64         60             106                97
1     65 - 69         65             101                93
2     70 - 74         70              95                89
3     75 - 79         75              88                84
4     80 - 84         80              80                78
5     85 - 89         85              71                70
6     90 - 94         90              60                60
7     95 - 99         95              53                55
8   100 - 104        100              47                49
9   105 - 109        105              40                44
10  110 - 114        110              34                39
11  115 - 119        115              27                34
12  120 - 124        120              21                28


Interesting, due to the differences between thresholds for men being larger, their cut-off scores decrease more drastically than the threshold estimates for women. 

## And include younger Test Takers

Given the original purpose of a cut-off-score for test takers who are at least 60 years of age, I will retrain the same logic and apply the threshold of physical independence only to older test takers.

In [13]:
# filter physical independence data to include only people between 60 - 94 yrs of age (use original sample and not interpolated data!)
#physical_independence_filtered = physical_independence.loc[physical_independence['start_age'] <= 94]

# obtain differences for men and women
#physical_independence_filtered['male_diff'] = physical_independence_filtered['threshold_male'].diff().fillna(0)
#physical_independence_filtered['female_diff'] = physical_independence_filtered['threshold_female'].diff().fillna(0)

In [14]:
# compute (negative) mean difference
#mean_male_diff = -physical_independence_filtered['male_diff'].mean()
#mean_female_diff = -physical_independence_filtered['female_diff'].mean()

In [15]:
# set new age groups for test takers between 15 and 59 yrs of age
#new_ages = np.arange(15, 60, 5)
#new_age_ranges = [f'{age} - {age + 4}' for age in new_ages]

In [16]:
#first_male_score = physical_independence_filtered['threshold_male'].iloc[0]
#first_female_score = physical_independence_filtered['threshold_female'].iloc[0]

#predicted_male = [first_male_score + ((len(new_ages) - 1 - i) * mean_male_diff) for i in range(len(new_ages))]
#predicted_female = [first_female_score + ((len(new_ages) - 1 - i) * mean_female_diff) for i in range(len(new_ages))]

In [17]:
#interpolated_cutoffs_younger = pd.DataFrame({
#    'age_range': new_age_ranges,
#    'start_age': new_ages,
#    'threshold_male': predicted_male,
#    'threshold_female': predicted_female
#})

#full_cutoff_data = pd.concat([interpolated_cutoffs_younger, full_cutoff_data]).reset_index(drop=True)

In [18]:
#full_cutoff_data[['threshold_male', 'threshold_female']] = full_cutoff_data[['threshold_male', 'threshold_female']].round(0).astype(int)

#print(full_cutoff_data)

## Interpolating Benchmark Data

I will again interpolate missing data for age ranges 15 - 59 years of age and 95 to 120 years of age, as those were not present in the data presented by Rikli & Jones (1999b). This time, I will focus on the mean number of steps taken and the standard deviation of those, per age group and gender. 

In [19]:
# compute differences between consecutive age ranges to obtain downwards trend in the physical independence threshold and also st. deviations
benchmark_data['steps_male_diff'] = benchmark_data['steps_male'].diff().fillna(0)
benchmark_data['steps_female_diff'] = benchmark_data['steps_female'].diff().fillna(0)
benchmark_data['sd_male_diff'] = benchmark_data['sd_male'].diff().fillna(0)
benchmark_data['sd_female_diff'] = benchmark_data['sd_female'].diff().fillna(0)

In [20]:
# compute means of steps taken and sd
mean_steps_male_diff = benchmark_data['steps_male_diff'].mean()
mean_steps_female_diff = benchmark_data['steps_female_diff'].mean()
mean_sd_male_diff = benchmark_data['sd_male_diff'].mean()
mean_sd_female_diff = benchmark_data['sd_female_diff'].mean()

In [21]:
# set new age ranges for adults between the ages of 95 and 120 yrs
new_ages = np.arange(95, 125, 5)
new_age_ranges = [f'{age} - {age + 4}' for age in new_ages]

In [22]:
# select last known mean nr of steps and sd for both women and men
last_steps_male = benchmark_data['steps_male'].iloc[-1]
last_steps_female = benchmark_data['steps_female'].iloc[-1]
last_sd_male = benchmark_data['sd_male'].iloc[-1]
last_sd_female = benchmark_data['sd_female'].iloc[-1]

In [23]:
# and predict mean number of steps and sd per new age range
predicted_steps_male = [last_steps_male + (i * mean_steps_male_diff) for i in range(1, len(new_ages) + 1)]
predicted_steps_female = [last_steps_female + (i * mean_steps_female_diff) for i in range(1, len(new_ages) + 1)]
predicted_sd_male = [last_sd_male + (i * mean_sd_male_diff) for i in range(1, len(new_ages) + 1)]
predicted_sd_female = [last_sd_female + (i * mean_sd_female_diff) for i in range(1, len(new_ages) + 1)]

In [24]:
# store all values in new df
interpolated_benchmarks = pd.DataFrame({
    'age_range': new_age_ranges,
    'start_age': new_ages,
    'steps_male': predicted_steps_male,
    'sd_male': predicted_sd_male,
    'steps_female': predicted_steps_female,
    'sd_female': predicted_sd_female
})

# and concat interpolated data to benchmark data
full_benchmark_data = pd.concat([benchmark_data[['steps_male', 'sd_male', 'steps_female', 'sd_female', 'start_age', 'age_range']], interpolated_benchmarks]).reset_index(drop=True)

In [25]:
# round all values to full integers to stay consistent with benchmark data
full_benchmark_data[['steps_male', 'sd_male', 'steps_female', 'sd_female']] = full_benchmark_data[['steps_male', 'sd_male', 'steps_female', 'sd_female']].round(0).astype(int)

full_benchmark_data

Unnamed: 0,steps_male,sd_male,steps_female,sd_female,start_age,age_range
0,101,21,91,24,60,60 - 64
1,101,23,90,26,65,65 - 69
2,95,23,84,25,70,70 - 74
3,91,27,84,24,75,75 - 79
4,87,24,75,23,80,80 - 84
5,75,24,70,22,85,85 - 89
6,69,26,58,21,90,90 - 94
7,64,27,53,21,95,95 - 99
8,60,27,49,20,100,100 - 104
9,55,28,44,20,105,105 - 109


# Including Younger Test Takesr:

In [26]:
# filter physical independence data to include only people between 60 - 94 yrs of age (use original sample and not interpolated data!)
benchmark_data_filtered = benchmark_data.loc[benchmark_data['start_age'] <= 90]
benchmark_data['steps_male_diff'] = benchmark_data_filtered['steps_male'].diff().fillna(0)
benchmark_data['steps_female_diff'] = benchmark_data_filtered['steps_female'].diff().fillna(0)

In [27]:
# obtain reverse mean difference due to interpolation towards younger test takers now! Mean nr. of steps should be increasing
mean_steps_male_diff = -benchmark_data['steps_male_diff'].mean()
mean_steps_female_diff = -benchmark_data['steps_female_diff'].mean()

In [28]:
# set new age ranges for 15-59
new_ages = np.arange(15, 60, 5)
new_age_ranges = [f'{age} - {age + 4}' for age in new_ages]

In [29]:
# and use first mean nr of steps and sd of benchmark data (age range: 60-65)
first_steps_male = benchmark_data['steps_male'].iloc[0]
first_steps_female = benchmark_data['steps_female'].iloc[0]
first_sd_male = benchmark_data['sd_male'].iloc[0]
first_sd_female = benchmark_data['sd_female'].iloc[0]

- NOTE: The mean-difference is multiplied with the factor 0.5, as it would otherwise become unrealistic. The differences between mean number of steps show a stronger decline between older age ranges than they do between younger age ranges!

In [30]:
# and predict mean nr of steps and sd for younger test takers by adding the mean-difference to last know score
predicted_steps_male = [first_steps_male + ((len(new_ages) - 1 - i) * mean_steps_male_diff * 0.5) for i in range(len(new_ages))]
predicted_steps_female = [first_steps_female + ((len(new_ages) - 1 - i) * mean_steps_female_diff * 0.5) for i in range(len(new_ages))]

In [31]:
# and do the same for the standard deviations
predicted_sd_male = [first_sd_male + ((len(new_ages) - 1 - i) * mean_sd_male_diff * 0.5) for i in range(len(new_ages))]
predicted_sd_female = [first_sd_female + ((len(new_ages) - 1 - i) * mean_sd_female_diff * 0.5) for i in range(len(new_ages))]

In [32]:
# store again as new df
interpolated_benchmarks = pd.DataFrame({
    'age_range': new_age_ranges,
    'start_age': new_ages,
    'steps_male': predicted_steps_male,
    'sd_male': predicted_sd_male,
    'steps_female': predicted_steps_female,
    'sd_female': predicted_sd_female
})

# and append predicted benchmark data to full benchmark data
fullest_benchmark_data = pd.concat([interpolated_benchmarks, full_benchmark_data]).reset_index(drop=True)

In [33]:
# round again to nearest integer for consistency
fullest_benchmark_data[['steps_male', 'sd_male', 'steps_female', 'sd_female']] = fullest_benchmark_data[['steps_male', 'sd_male', 'steps_female', 'sd_female']].round(0).astype(int)

print(fullest_benchmark_data)

    age_range  start_age  steps_male  sd_male  steps_female  sd_female
0     15 - 19         15         119       24           110         22
1     20 - 24         20         117       24           108         22
2     25 - 29         25         115       23           105         23
3     30 - 34         30         112       23           103         23
4     35 - 39         35         110       22           100         23
5     40 - 44         40         108       22            98         23
6     45 - 49         45         106       22            96         24
7     50 - 54         50         103       21            93         24
8     55 - 59         55         101       21            91         24
9     60 - 64         60         101       21            91         24
10    65 - 69         65         101       23            90         26
11    70 - 74         70          95       23            84         25
12    75 - 79         75          91       27            84         24
13    

And create a functioning copy:

In [34]:
benchmark_data = fullest_benchmark_data.copy()

# Scoring and Personal Performance

The scoring consists of the overall number of steps that were taken by the test taker. This is the most important metric. It will be used to compare the test taker's performance against the benchmark performance based on their age and gender. A benchmark comparison is reported. Finally, a detailed feedback to each segment is presented, which can be combined with a visual overview of the entire performance as presented in the analysis heuristic code.

In [35]:
def feedback_steps(res, cons):

    # obtain the final steps taken as a sum, the mean and sd over all four segemnts
    steps = np.array(res['num_steps'])
    mean_steps = cons['mean_steps'].iloc[0]
    sd_steps = cons['sd_steps'].iloc[0]
    
    # store nr of steps in feedback, enter segment info
    feedback = f"Total steps taken: {steps.sum()}.\n"
    feedback += f"Mean steps per segment: {mean_steps:.2f}, with a standard deviation of {sd_steps:.2f}.\n"

    # compute z-scores to check performance over segments
    z_scores = (steps - mean_steps) / sd_steps
    
    # and return feedback based on whether segment scores are off from mean by 2 standard deviations
    for i, z in enumerate(z_scores):
        if abs(z) >= 2:
            feedback += f"Segment S{i+1} is more than 2 standard deviations from the mean (z-score: {z:.2f}).\n"
        else:
            feedback += f"Segment S{i+1} is within normal range (z-score: {z:.2f}).\n"
    
    return feedback


In [36]:
feedback_stepping = feedback_steps(results, consistency)
print(feedback_stepping)

Total steps taken: 82.
Mean steps per segment: 20.50, with a standard deviation of 1.12.
Segment S1 is within normal range (z-score: -1.34).
Segment S2 is within normal range (z-score: 1.34).
Segment S3 is within normal range (z-score: -0.45).
Segment S4 is within normal range (z-score: 0.45).



In [37]:
def feedback_coordination(res, cons):
    
    # obtain sum of valid steps and the sum of steps under correct cross-body-coordination
    steps = np.array(res['num_steps'])
    coordination = np.array(res['correct_coordination'])
    
    # compute means and sd 
    mean_steps = cons['mean_steps'].iloc[0]
    std_steps = cons['sd_steps'].iloc[0]
    mean_coord = cons['mean_coordination'].iloc[0]
    std_coord = cons['sd_coordination'].iloc[0]
    
    # and store as additional feedback coordination
    feedback = f"Total correct coordination: {coordination.sum()}.\n"
    feedback += f"Mean coordination per segment: {mean_coord:.2f}, with a standard deviation of {std_coord:.2f}.\n"
    
    # compute z scores for both steps and cross-body-coordination
    z_scores_steps = (steps - mean_steps) / std_steps
    z_scores_coord = (coordination - mean_coord) / std_coord

    # and add to feedback if cross-body-coordination is consistent around mean or off by at least 2 sd
    for i, z in enumerate(z_scores_coord):
        if abs(z) >= 2:
            feedback += f"Segment S{i+1} coordination is more than 2 standard deviations from the mean (z-score: {z:.2f}).\n"
        else:
            feedback += f"Segment S{i+1} coordination is within normal range (z-score: {z:.2f}).\n"
    
    # compute correlation to evaluate similarity between the number of steps tkane and the number of steps under correct_coordination across segments
    correlation = np.corrcoef(steps, coordination)[0, 1]
    # and add overall correlation to feedback
    feedback += f"The correlation between steps and correct coordination is {correlation:.2f}.\n"
    
    # compute ratio of coordination to steps per segment
    ratios = coordination / steps
    
    # and add ratio as a more nuanced metric per segment to feedback
    for i, ratio in enumerate(ratios):
        feedback += f"The ratio between steps and coordination for segment S{i+1} is: {ratio:.2f}. \n"
    
    return feedback


In [38]:
feedback_coord = feedback_coordination(results, consistency)
print(feedback_coord)

Total correct coordination: 82.
Mean coordination per segment: 20.50, with a standard deviation of 1.12.
Segment S1 coordination is within normal range (z-score: -1.34).
Segment S2 coordination is within normal range (z-score: 1.34).
Segment S3 coordination is within normal range (z-score: -0.45).
Segment S4 coordination is within normal range (z-score: 0.45).
The correlation between steps and correct coordination is 1.00.
The ratio between steps and coordination for segment S1 is: 1.00. 
The ratio between steps and coordination for segment S2 is: 1.00. 
The ratio between steps and coordination for segment S3 is: 1.00. 
The ratio between steps and coordination for segment S4 is: 1.00. 



Clean up lists of time_between_peaksl/r and r/l_amplitudes to ascertain format, by cleaning any \n, not required white spaces and additional commas. Finally, convert from string to float.

In [39]:
def clean_string_array(arr):

    # replace \n with whitespace
    cleaned_arr = arr.replace('\n', ' ')
    
    # then clean of any unnecessary white space
    cleaned_arr = re.sub(r'\s+', ' ', cleaned_arr)
    
    # replace remaining white spaces with commas
    cleaned_arr = cleaned_arr.replace(' ', ',')
    
    # and remove any trailing commas, if present
    cleaned_arr = cleaned_arr.strip(',')
    
    return cleaned_arr


In [40]:
def clean_and_convert(arr):

    # execute cleaning 
    cleaned_arr = clean_string_array(arr)
    
    # and return as array with values of type float
    return np.array(ast.literal_eval(cleaned_arr), dtype=float)


In [41]:
def feedback_knee_height(data):

    # clean lists containing y-coordinates of knee peaks for right and left knee
    right_amplitudes = [clean_and_convert(x) for x in data['right_amplitudes']]
    left_amplitudes = [clean_and_convert(x) for x in data['left_amplitudes']]
    
    # store overall variation between legs/knees in arrays
    right_variation = np.array(data['right_variation'])
    left_variation = np.array(data['left_variation'])
    
    # compute mean and sd over right and left side
    avg_right = np.mean(np.concatenate(right_amplitudes))
    std_right = np.std(np.concatenate(right_amplitudes))
    avg_left = np.mean(np.concatenate(left_amplitudes))
    std_left = np.std(np.concatenate(left_amplitudes))
    
    # and add to feedack
    feedback = f"Overall average right knee height: {avg_right:.3f} (std: {std_right:.3f}).\n"
    feedback += f"Overall average left knee height: {avg_left:.3f} (std: {std_left:.3f}).\n"
    
    # further, check if sd per side is greater than 0.02: If yes, note this is sufficient variation in feedback
    if std_right < 0.02 and std_left < 0.02:
        feedback += "The variation in knee height was small, indicating consistent performance.\n"
    else:
        feedback += "There was noticeable variation in knee height. Try to maintain more consistent performance.\n"
    
    # and provide specific feedback per segment
    for i in range(len(data['right_amplitudes'])):
        right_amp_segment = right_amplitudes[i]
        left_amp_segment = left_amplitudes[i]
        var_right_segment = right_variation[i]
        var_left_segment = left_variation[i]
        
        avg_right_segment = np.mean(right_amp_segment)
        avg_left_segment = np.mean(left_amp_segment)

        feedback += f"\nSegment S{i+1}:\n"
        feedback += f"  Right knee average height: {avg_right_segment:.3f}, variation: {np.mean(var_right_segment):.3f}\n"
        feedback += f"  Left knee average height: {avg_left_segment:.3f}, variation: {np.mean(var_left_segment):.3f}\n"
        
        # apply same threshold of 0.02 here to allow if there was greater variation in one of the segments that might not show up in the overall average
        if np.mean(var_right_segment) < 0.02:
            feedback += "  The right knee showed little variation, indicating stable performance in this segment.\n"
        else:
            feedback += "  The right knee showed some variation in height. Try to maintain more stable movement.\n"

        if np.mean(var_left_segment) < 0.02:
            feedback += "  The left knee showed little variation, indicating stable performance in this segment.\n"
        else:
            feedback += "  The left knee showed some variation in height. Try to maintain more stable movement.\n"

    return feedback


In [42]:
feedback_knee = feedback_knee_height(results)
print(feedback_knee)

Overall average right knee height: 0.479 (std: 0.017).
Overall average left knee height: 0.480 (std: 0.018).
The variation in knee height was small, indicating consistent performance.

Segment S1:
  Right knee average height: 0.496, variation: 0.017
  Left knee average height: 0.491, variation: 0.022
  The right knee showed little variation, indicating stable performance in this segment.
  The left knee showed some variation in height. Try to maintain more stable movement.

Segment S2:
  Right knee average height: 0.484, variation: 0.010
  Left knee average height: 0.484, variation: 0.015
  The right knee showed little variation, indicating stable performance in this segment.
  The left knee showed little variation, indicating stable performance in this segment.

Segment S3:
  Right knee average height: 0.468, variation: 0.011
  Left knee average height: 0.470, variation: 0.015
  The right knee showed little variation, indicating stable performance in this segment.
  The left knee show

In [43]:
def feedback_time_between_peaks(data):

    # initialze empty feedback string, to prepare loop over segments 
    feedback = ""

    # and loop over all four segments:
    for i in range(4):
        # and clean and convert times_between_peaks for both right and left knee
        right_times = clean_and_convert(data['time_between_right_peaks'][i])
        left_times = clean_and_convert(data['time_between_left_peaks'][i])
        
        # compute mean and sd for right and left knee
        avg_right_time = np.mean(right_times)
        std_right_time = np.std(right_times)
        avg_left_time = np.mean(left_times)
        std_left_time = np.std(left_times)
        
        # and add information per segment to feedback 
        feedback += (f"Segment {i + 1}:\n"
                      f"  Right knee peak time: Avg = {avg_right_time:.2f} seconds (std: {std_right_time:.2f})\n"
                      f"  Left knee peak time: Avg = {avg_left_time:.2f} seconds (std: {std_left_time:.2f})\n")
        
        # if there was greater variation than 0.2 seconds, note that as sufficient variation in main feedback!
        if std_right_time > 0.2 or std_left_time > 0.2:
            feedback += "  The time between knee peaks varied significantly. Aim for a more consistent pace.\n"
        else:
            feedback += "  The time between knee peaks was consistent.\n"
    
    return feedback


In [44]:
feedback = feedback_time_between_peaks(results)
print(feedback)

Segment 1:
  Right knee peak time: Avg = 1.44 seconds (std: 0.04)
  Left knee peak time: Avg = 1.44 seconds (std: 0.05)
  The time between knee peaks was consistent.
Segment 2:
  Right knee peak time: Avg = 1.44 seconds (std: 0.04)
  Left knee peak time: Avg = 1.44 seconds (std: 0.05)
  The time between knee peaks was consistent.
Segment 3:
  Right knee peak time: Avg = 1.44 seconds (std: 0.04)
  Left knee peak time: Avg = 1.44 seconds (std: 0.05)
  The time between knee peaks was consistent.
Segment 4:
  Right knee peak time: Avg = 1.44 seconds (std: 0.04)
  Left knee peak time: Avg = 1.44 seconds (std: 0.05)
  The time between knee peaks was consistent.



In [45]:
def feedback_breaks(data):
    
    # store the number of breaks taken during the test as sum over all four segments
    num_breaks = np.sum(np.array(data['num_breaks']))
    
    # if the test taker took zero breaks, display:
    if num_breaks == 0:
        feedback = "No breaks were taken during the test. Your endurance is good! \n"
    # if one break was taken:
    elif num_breaks == 1:
        feedback = f"{num_breaks} break was taken during the test. Try to minimize breaks and aim for better endurance next time! \n"
    # if more than one break was taken:
    else:
        feedback = f"{num_breaks} break(s) were taken during the test. Try to minimize breaks and aim for better endurance next time! \n"
    
    return feedback

In [46]:
breaks_feedback = feedback_breaks(results)
print(breaks_feedback)

No breaks were taken during the test. Your endurance is good! 



And initialize one main function that runs all other feedback functions and stores the main feedback.

In [47]:
def generate_feedback(res, cons):

    # obtain feedback 
    feedback = "Feedback for 2-Minute Step Test Performance:\n\n"
    
    # for steps taken
    feedback += feedback_steps(res, cons)
    feedback += "\n"
    
    # and cross-body-coordination
    feedback += feedback_coordination(res, cons)
    feedback += "\n"
    
    # and the heights to which the knees were lifted
    feedback += feedback_knee_height(res)
    feedback += "\n"
    
    # and the average time that went by between knee peaks
    feedback += feedback_time_between_peaks(res)
    feedback += "\n"
    
    # and finally, add feedback about greaks that were taken
    feedback += feedback_breaks(res)
    
    return feedback

# Benchmark based Scoring with Personal Feedback

And finally, allow for benchmark comparison of test takers to their specific demographic group based on age and gender. 

In [48]:
# setup of helper function to get age range for benchmarks and place person in their age range
def get_age_range(age, benchmark_df):
    
    # use persons age to abtain closest match in the benchmark data
    closest_age = benchmark_df['start_age'].loc[benchmark_df['start_age'] <= age].max()
    
    # and return row of correctly identified benchmark data 
    return benchmark_df.loc[benchmark_df['start_age'] == closest_age].iloc[0]

Perform overall benchmark comparison after bechnmark 

In [49]:
def generate_benchmark_comparison(age, gender, benchmark_row, threshold=None):
    
    # check test taker's gender and assign column
    if gender.lower() == 'female':
        mean_steps = benchmark_row['steps_female']
    else:
        mean_steps = benchmark_row['steps_male']

    # and add benchmark-group information to feedback
    feedback = f"You are compared to {gender.capitalize()}s aged {benchmark_row['age_range']}. "
    feedback += f"The mean number of steps taken for your group is {mean_steps:.1f}."
    
    # also return matching cut-off score for physical independence in cases where test taker is at least 60 yrs of age
    if threshold is not None:
        feedback += f" The cutoff for physical independence for your group is {threshold} steps."
    
    return feedback

In [50]:
def scoring_2MS_benchmark(sum_steps, sum_coordination, age, gender):

    # use helper function to select correct age range from benchmark data
    benchmark_row = get_age_range(age, benchmark_data)
    
    # perform check for physical independence if test taker is at least 60 years old
    threshold = None
    if age >= 60:
        threshold_row = get_age_range(age, full_cutoff_data)
        threshold = threshold_row[f'threshold_{gender.lower()}']

    # obtain benchmark comparison feedback (includes physical independence threshold if applicable!)
    benchmark_feedback = generate_benchmark_comparison(age, gender, benchmark_row, threshold)
    
    # obtain personal feedback based on test performance over four segments
    personal_feedback = generate_feedback(results, consistency)
    
    # and present information an valid steps taken
    step_info = f"Congratulations, you have completed the 2 Minute Step Test and took {sum_steps} valid steps! \n"
    
    # return final step score, benchmark comparison, physical independence, and personal feedback
    return {
        'steps': step_info,
        'benchmark_comparison': benchmark_feedback,
        'physical_independence': "physically independent" if age >= 60 and sum_steps >= threshold else "not physically independent" if age >= 60 else None,
        'personal_feedback': personal_feedback
    }


In [51]:
# and test on exmample data
steps = sum(np.array(results['correct_coordination']))
coord = sum(np.array(results['correct_coordination']))

result = scoring_2MS_benchmark(sum_steps=steps, sum_coordination=coord, age=67, gender='female')
print(result)

{'steps': 'Congratulations, you have completed the 2 Minute Step Test and took 82 valid steps! \n', 'benchmark_comparison': 'You are compared to Females aged 65 - 69. The mean number of steps taken for your group is 90.0. The cutoff for physical independence for your group is 93 steps.', 'physical_independence': 'not physically independent', 'personal_feedback': 'Feedback for 2-Minute Step Test Performance:\n\nTotal steps taken: 82.\nMean steps per segment: 20.50, with a standard deviation of 1.12.\nSegment S1 is within normal range (z-score: -1.34).\nSegment S2 is within normal range (z-score: 1.34).\nSegment S3 is within normal range (z-score: -0.45).\nSegment S4 is within normal range (z-score: 0.45).\n\nTotal correct coordination: 82.\nMean coordination per segment: 20.50, with a standard deviation of 1.12.\nSegment S1 coordination is within normal range (z-score: -1.34).\nSegment S2 coordination is within normal range (z-score: 1.34).\nSegment S3 coordination is within normal rang