## Simulated Data

In [5]:
import numpy as np
import pandas as pd

# Set random seed
np.random.seed(42)

# Generate golfer data
n_golfers = 100000

ages = np.random.randint(20, 60, size=n_golfers)
heights = np.random.normal(5.9, 0.5, size=n_golfers)
weights = np.random.normal(170, 20, size=n_golfers)
lengths_of_time_playing = np.random.randint(1, ages - 7, size=n_golfers)

distance_driver = np.maximum(np.random.normal(250, 20, size=n_golfers) - 2 * ages, 0)
distance_3_wood = np.maximum(distance_driver - np.random.normal(10, 10, size=n_golfers), 0)
distance_5_wood = np.maximum(distance_3_wood - np.random.normal(10, 3, size=n_golfers), 0)
distance_3_iron = np.maximum(distance_5_wood - np.random.normal(10, 3, size=n_golfers), 0)
distance_4_iron = np.maximum(distance_3_iron - np.random.normal(10, 3, size=n_golfers), 0)
distance_5_iron = np.maximum(distance_4_iron - np.random.normal(10, 3, size=n_golfers), 0)
distance_6_iron = np.maximum(distance_5_iron - np.random.normal(10, 3, size=n_golfers), 0)
distance_7_iron = np.maximum(distance_6_iron - np.random.normal(10, 3, size=n_golfers), 0)
distance_8_iron = np.maximum(distance_7_iron - np.random.normal(10, 3, size=n_golfers), 0)
distance_9_iron = np.maximum(distance_8_iron - np.random.normal(10, 3, size=n_golfers), 0)
distance_pitching_wedge = np.maximum(distance_9_iron - np.random.normal(15, 5, size=n_golfers), 0)
distance_sand_wedge = np.maximum(distance_pitching_wedge - np.random.normal(10, 5, size=n_golfers), 0)
distance_lob_wedge = np.maximum(distance_sand_wedge - np.random.normal(5, 5, size=n_golfers), 0)


goals = np.random.randint(0, 41, size=n_golfers) + 60

# Add a column for average putts per hole with negative correlation to lengths_of_time_playing
avg_putts = np.random.normal(1.8, 0.2, size=n_golfers) + (lengths_of_time_playing - np.mean(lengths_of_time_playing)) * -0.05

# Correlate height and weight positively to distance
distance_driver += heights * 10 + weights * 0.05
distance_3_wood += heights * 8 + weights * 0.04
distance_5_wood += heights * 6 + weights * 0.03
distance_3_iron += heights * 4 + weights * 0.02
distance_4_iron += heights * 3 + weights * 0.01
distance_5_iron += heights * 2 + weights * 0.005
distance_6_iron += heights * 1 + weights * 0.002
distance_7_iron += heights * 0.5
distance_8_iron += heights * 0.25
distance_9_iron += heights * 0.1
distance_pitching_wedge += heights *.1
distance_sand_wedge += heights * 0.1 + weights * 0.01
distance_lob_wedge += heights * 0.05 + weights * 0.005


In [6]:
# Create dataframe
data = {'Age': ages, 
        'Height': heights, 
        'Weight': weights, 
        'Time Playing': lengths_of_time_playing, 
        'Distance Driver': distance_driver,
        'Distance 3 Wood': distance_3_wood,
        'Distance 5 Wood': distance_5_wood,
        'Distance 3 Iron': distance_3_iron,
        'Distance 4 Iron': distance_4_iron,
        'Distance 5 Iron': distance_5_iron,
        'Distance 6 Iron': distance_6_iron,
        'Distance 7 Iron': distance_7_iron,
        'Distance 8 Iron': distance_8_iron,
        'Distance 9 Iron': distance_9_iron,
        'Distance Pitching Wedge': distance_pitching_wedge,
        'Distance Sand Wedge': distance_sand_wedge,
        'Distance Lob Wedge': distance_lob_wedge,
        'Goal': goals,
        'Avg Putts per Hole': avg_putts}
df = pd.DataFrame(data)


In [7]:
df

Unnamed: 0,Age,Height,Weight,Time Playing,Distance Driver,Distance 3 Wood,Distance 5 Wood,Distance 3 Iron,Distance 4 Iron,Distance 5 Iron,Distance 6 Iron,Distance 7 Iron,Distance 8 Iron,Distance 9 Iron,Distance Pitching Wedge,Distance Sand Wedge,Distance Lob Wedge,Goal,Avg Putts per Hole
0,58,6.687990,160.746830,42,234.426952,190.992962,165.532527,141.704962,122.633840,104.215184,88.101773,77.917819,65.149705,53.396395,37.726757,34.948601,35.415814,62,0.407444
1,48,5.828622,135.025692,17,258.733523,232.279659,202.664571,178.538621,158.985108,141.224028,126.631893,111.462355,101.202095,87.850690,71.227607,59.436264,52.028218,70,1.896571
2,34,6.520150,144.098835,14,251.007119,219.016190,197.044330,168.678186,152.906984,134.392469,117.408614,106.693002,93.928560,82.416863,68.588771,55.002290,41.576148,76,2.109978
3,27,5.739278,175.348054,11,239.775532,226.872189,203.368335,172.986626,158.541549,140.155068,127.345407,114.337440,104.413071,95.464187,77.789901,68.212397,62.426381,64,2.110778
4,40,5.544829,146.845893,10,252.681618,233.462209,205.956724,182.783930,161.981747,149.521107,129.947437,117.870814,110.760787,100.681761,88.115339,75.516883,60.745214,99,2.055277
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,53,6.422442,207.973114,25,204.847500,167.127719,141.194552,116.102912,99.923841,79.351905,64.638605,50.369211,35.997249,26.853344,7.777017,4.491485,1.360988,70,1.325357
99996,39,5.893752,183.253194,6,233.411683,206.393814,185.387445,160.171494,149.457637,136.104684,122.602983,108.359909,101.388469,90.897281,78.696370,65.271424,54.230460,97,2.425413
99997,54,4.727475,203.702021,29,169.237968,162.724023,137.565129,116.749210,104.200561,87.339713,70.595730,56.796969,43.172568,34.394290,23.793027,11.219554,4.332575,92,1.060342
99998,35,7.016599,206.697759,3,249.180614,210.358710,186.849516,163.242154,143.785677,126.662094,111.185878,99.897126,86.550598,74.910059,55.511270,38.564050,21.332489,63,2.663252


## User Inputs

In [8]:
import numpy as np

age = int(input("Enter your age: "))
height = float(input("Enter your height in feet: "))
weight = float(input("Enter your weight in pounds: "))
time_playing = int(input("Enter the number of years you've been playing: "))
driver_dist = float(input("Enter your distance with a driver in yards: "))
wood3_dist = float(input("Enter your distance with a 3 wood in yards: "))
wood5_dist = float(input("Enter your distance with a 5 wood in yards: "))
iron3_dist = float(input("Enter your distance with a 3 iron in yards: "))
iron4_dist = float(input("Enter your distance with a 4 iron in yards: "))
iron5_dist = float(input("Enter your distance with a 5 iron in yards: "))
iron6_dist = float(input("Enter your distance with a 6 iron in yards: "))
iron7_dist = float(input("Enter your distance with a 7 iron in yards: "))
iron8_dist = float(input("Enter your distance with a 8 iron in yards: "))
iron9_dist = float(input("Enter your distance with a 9 iron in yards: "))
pw_dist = float(input("Enter your distance with a pitching wedge in yards: "))
sw_dist = float(input("Enter your distance with a sand wedge in yards: "))
lw_dist = float(input("Enter your distance with a lob wedge in yards: "))
goal = int(input("Enter your golfing goal (score to break): "))
putts_per_hole = float(input("Enter your estimated average putts per hole: "))

user_stats = [age, height, weight, time_playing, driver_dist, wood3_dist, wood5_dist, iron3_dist, iron4_dist, iron5_dist, iron6_dist, iron7_dist, iron8_dist, iron9_dist, pw_dist, sw_dist, lw_dist, goal, putts_per_hole]


Enter your age: 21
Enter your height in feet: 6.4
Enter your weight in pounds: 227
Enter the number of years you've been playing: 1
Enter your distance with a driver in yards: 250
Enter your distance with a 3 wood in yards: 224
Enter your distance with a 5 wood in yards: 205
Enter your distance with a 3 iron in yards: 255
Enter your distance with a 4 iron in yards: 245
Enter your distance with a 5 iron in yards: 235
Enter your distance with a 6 iron in yards: 220
Enter your distance with a 7 iron in yards: 210
Enter your distance with a 8 iron in yards: 190
Enter your distance with a 9 iron in yards: 170
Enter your distance with a pitching wedge in yards: 165
Enter your distance with a sand wedge in yards: 130
Enter your distance with a lob wedge in yards: 100
Enter your golfing goal (score to break): 98
Enter your estimated average putts per hole: 2.5


## Percentiles

In [9]:
from scipy.stats import percentileofscore

cols_to_compare = ['Distance Driver', 'Distance 3 Wood', 'Distance 5 Wood', 'Distance 3 Iron', 'Distance 4 Iron', 'Distance 5 Iron', 'Distance 6 Iron', 'Distance 7 Iron', 'Distance 8 Iron', 'Distance 9 Iron', 'Distance Pitching Wedge', 'Distance Sand Wedge', 'Distance Lob Wedge']
a_stats = [driver_dist, wood3_dist, wood5_dist, iron3_dist, iron4_dist, iron5_dist, iron6_dist, iron7_dist, iron8_dist, iron9_dist, pw_dist, sw_dist, lw_dist]
p_list = []

for col, val in zip(cols_to_compare, a_stats):
    col_percentile = percentileofscore(df[col], val)
    print("Your provided input of {} yards for {} ranks in the {:.2f} percentile of our users of a similar age!".format(val, col, col_percentile))
    p_list.append(col_percentile)

Your provided input of 250.0 yards for Distance Driver ranks in the 63.59 percentile of our users!
Your provided input of 224.0 yards for Distance 3 Wood ranks in the 60.33 percentile of our users!
Your provided input of 205.0 yards for Distance 5 Wood ranks in the 65.20 percentile of our users!
Your provided input of 255.0 yards for Distance 3 Iron ranks in the 99.80 percentile of our users!
Your provided input of 245.0 yards for Distance 4 Iron ranks in the 99.92 percentile of our users!
Your provided input of 235.0 yards for Distance 5 Iron ranks in the 99.97 percentile of our users!
Your provided input of 220.0 yards for Distance 6 Iron ranks in the 99.96 percentile of our users!
Your provided input of 210.0 yards for Distance 7 Iron ranks in the 99.97 percentile of our users!
Your provided input of 190.0 yards for Distance 8 Iron ranks in the 99.93 percentile of our users!
Your provided input of 170.0 yards for Distance 9 Iron ranks in the 99.77 percentile of our users!
Your provi

## Insights

In [69]:
goal_putt = percentileofscore(df['Avg Putts per Hole'], putts_per_hole)
if putts_per_hole >= 2.5:
    print("Your average putts of {} indicates that you may three-putt just a bit more than normal. We've all been there, but consider spending some extra time on the putting green and checking out our coaching!".format(putts_per_hole))

Your average putts of 2.5 indicates that you may three-putt just a bit more than normal. We've all been there, but consider spending some extra time on the putting green and checking out our coaching!


In [14]:
long_ball = 0
medium_ball = 0
short_ball = 0
for val in p_list:
    if val >= .75:
        long_ball += 1
    if val > .25 and val <.75:
        medium_ball += 1
    if val <= .25:
        short_ball += 1

if long_ball > medium_ball and long_ball > short_ball:
    print("It looks like you have some great distance on the ball! Maybe you could benefit from our distance control programs, or perhaps fine tuning your accuracy or putting.")
if medium_ball > short_ball and medium_ball > long_ball:
    print("Your distances are just about average for our user base, and there is nothing wrong with that! If you want to add to your power, consider asking about tips to improve your striking or swing speed.")
if short_ball > medium_ball and short_ball > long_ball:      
    print("So you're not a power hitter, and that is perfectly okay. There are plenty of other ways to move the ball down the fairway. Consider working with us to improve your greens in regulation and club selection!")

It looks like you have some great distance on the ball! Maybe you could benefit from our distance control programs, or perhaps fine tuning your accuracy or putting.


#### Similar Age & Height

In [20]:
df_filtered = df.loc[(df['Age'] >= age - 5) & (df['Age'] <= age + 3)]
df_filtered = df.loc[(df['Height'] >= height - 0.3) & (df['Height'] <= height + 0.3)]
p_list2 = []

for col, val in zip(cols_to_compare, a_stats):
    col_percentile = percentileofscore(df_filtered[col], val)
    print("Your provided input of {} yards for {} ranks in the {:.2f} percentile of our users of a similar age!".format(val, col, col_percentile))
    p_list2.append(col_percentile)

Your provided input of 250.0 yards for Distance Driver ranks in the 58.56 percentile of our users of a similar age!
Your provided input of 224.0 yards for Distance 3 Wood ranks in the 56.46 percentile of our users of a similar age!
Your provided input of 205.0 yards for Distance 5 Wood ranks in the 62.43 percentile of our users of a similar age!
Your provided input of 255.0 yards for Distance 3 Iron ranks in the 99.77 percentile of our users of a similar age!
Your provided input of 245.0 yards for Distance 4 Iron ranks in the 99.91 percentile of our users of a similar age!
Your provided input of 235.0 yards for Distance 5 Iron ranks in the 99.96 percentile of our users of a similar age!
Your provided input of 220.0 yards for Distance 6 Iron ranks in the 99.97 percentile of our users of a similar age!
Your provided input of 210.0 yards for Distance 7 Iron ranks in the 99.98 percentile of our users of a similar age!
Your provided input of 190.0 yards for Distance 8 Iron ranks in the 99.9