In [1]:
# goal: using data provided by user to determine what shot to use

import pandas as pd
import numpy as np
import scipy

In [2]:
df = pd.read_csv('GGXY.csv')
display(df)

Unnamed: 0,Club,Club Speed,Ball Speed,Launch Angle,Spin Rate,Face Angle,Face to Path,Club Path,Attack Angle,Launch Direction,Carry
0,54 Wedge,81.6,96.4,27.7,9179,-5.7,-7.5,1.8,-1.2,-3.5,118.9
1,54 Wedge,84.9,93.7,33.4,6859,-,-,-,-,-0.5,113.9
2,54 Wedge,85,94.6,29.6,8638,-4.5,-6.7,2.2,-0.4,-2.6,115.9
3,54 Wedge,82.9,94.3,30.6,6042,-5.1,-8.3,3.2,0,-3.3,121.4
4,54 Wedge,85.1,98.0,27.7,8706,-,-,-,-,-4.3,120.9
...,...,...,...,...,...,...,...,...,...,...,...
306,7 Iron,66.6,96.7,18.1,4009,-9.6,-3,-6.6,-2.2,-9.0,132.8
307,7 Iron,64.9,91.2,18.3,3694,-4.4,-1.8,-2.6,-1,-4.1,121.5
308,7 Iron,68.3,91.4,16.6,3650,-6.3,-9.9,3.6,0,-4.4,116.2
309,7 Iron,-,57.3,1.9,1570,-,-,-,-,-6.4,6.7


In [3]:
# dictionary of club name and its loft angle
clubmeta = {'Driver':10.5, '3 Hybrid':20.5, '5 Hybrid':26, '3 Wood':15, '5 Wood':19, '4 Iron':24.5, 
            '5 Iron':27.5, '6 Iron':31, '7 Iron':34, '8 Iron':37, '9 Iron':41.5, '54 Wedge':54, 
            '56 Wedge':56, '58 Wedge':58, '60 Wedge':60}

In [4]:
clubcarry = {club: {None, None} for club in clubmeta} # dictionary of all the clubs, no carry yet
clubdata = df.groupby('Club')['Carry'].mean().to_dict() # avg of carry from data
clubcarry = {club: clubdata.get(club, clubcarry[club]) for club in clubcarry} # enter data
print(clubcarry)

{'Driver': {None}, '3 Hybrid': {None}, '5 Hybrid': {None}, '3 Wood': 149.86470588235292, '5 Wood': 246.29166666666666, '4 Iron': 130.10000000000002, '5 Iron': {None}, '6 Iron': {None}, '7 Iron': 111.73281250000001, '8 Iron': {None}, '9 Iron': {None}, '54 Wedge': 118.48181818181818, '56 Wedge': {None}, '58 Wedge': {None}, '60 Wedge': {None}}


In [5]:
def which_club(distance, clubcarry):
    clubs = list(clubcarry.keys())
    
    result_club, result_carry = min(clubcarry.items(), key=lambda x: abs(distance - x[1]))
    if result_carry == None:
        return result_club
    else:
        difference = abs(distance - result_carry)
        result_club_index = clubs.index(result_club)
    
        while (difference > (0.2 * result_carry) and result_club_index != 0):
            result_club = clubs[result_club_index - 1]
            difference = abs(distance - result_carry)
            result_club_index = clubs.index(result_club)
    
    return result_club

In [6]:
sample_data = {'Driver':250, '3 Hybrid':165, '5 Hybrid':150, '3 Wood':180, '5 Wood':175, '4 Iron':145, 
            '5 Iron':145, '6 Iron':130, '7 Iron':125, '8 Iron':115, '9 Iron':100, '54 Wedge':90, 
            '56 Wedge':70, '58 Wedge':55, '60 Wedge':50}

In [7]:
results = []
for distance in range(0, 401, 20):
    print(distance)
    club = which_club(distance, sample_data)
    results.append((distance, club))

# Create a DataFrame to store the results
df = pd.DataFrame(results, columns=['Yardage', 'Club to Use'])

# Output the DataFrame
display(df)

0
20
40
60
80
100
120
140
160
180
200
220
240
260
280
300
320
340
360
380
400


Unnamed: 0,Yardage,Club to Use
0,0,Driver
1,20,Driver
2,40,60 Wedge
3,60,58 Wedge
4,80,54 Wedge
5,100,9 Iron
6,120,7 Iron
7,140,4 Iron
8,160,3 Hybrid
9,180,3 Wood


In [None]:
def predict_carry(csv_path):
    import pandas as pd
    import numpy as np
    from sklearn.ensemble import RandomForestRegressor

    df = pd.read_csv(csv_path)
    df.replace("-", np.nan, inplace=True)
    df = df.apply(pd.to_numeric, errors='ignore')

    club_mapping = {
        'Driver': 1, 'b3 Wood': 2, '5 Wood': 3, '4 Hybrid': 4, '3 Iron': 5,
        '4 Iron': 6, '5 Iron': 7, '6 Iron': 8, '7 Iron': 9, '8 Iron': 10,
        '9 Iron': 11, 'PW': 12, '50 Wedge': 13, '52 Wedge': 14, '54 Wedge': 15,
        '56 Wedge': 16, '58 Wedge': 17, '60 Wedge': 18
    }

    df['Club'] = df['Club'].map(club_mapping)

    features = ['Club', 'Club Speed', 'Ball Speed', 'Launch Angle', 'Spin Rate']
    df_model = df.dropna(subset=['Carry', 'Club'] + features[1:])

    X = df_model[features]
    y = df_model['Carry']
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X, y)

    # Get known and missing clubs
    known_clubs = set(df_model['Club'].unique())
    all_clubs = set(club_mapping.keys())
    missing_clubs = sorted(list(all_clubs - known_clubs))

    # Define club categories for smarter feature guessing
    def club_category(club):
        if 'Wedge' in club or club in ['PW']:
            return 'wedge'
        elif 'Wood' in club or club == 'Driver':
            return 'wood'
        elif 'Hybrid' in club:
            return 'hybrid'
        else:
            return 'iron'

    df_model['category'] = df_model['Club'].apply(club_category)

    # Predict for each missing club using category-wise averages
    rows = []
    for club in missing_clubs:
        category = club_category(club)
        group_avg = df_model[df_model['category'] == category][features[1:]].mean()
        group_avg['club_num'] = club_mapping[club]
        rows.append(group_avg)

    df_missing = pd.DataFrame(rows)
    df_missing['Club'] = missing_clubs
    df_missing['Predicted Carry'] = model.predict(df_missing[features])

    return df_missing[['Club', 'Predicted Carry']].sort_values('club_num')



In [10]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

def predict_carrys(csv_path):
    # Load the CSV
    df = pd.read_csv(csv_path)
    
    # Replace dashes with NaN and convert numeric columns
    df.replace("-", np.nan, inplace=True)
    df = df.apply(pd.to_numeric, errors='ignore')

    # Define club name → numeric mapping
    club_mapping = {
        'Driver': 1, '3 Wood': 2, '5 Wood': 3, '4 Hybrid': 4, '3 Iron': 5,
        '4 Iron': 6, '5 Iron': 7, '6 Iron': 8, '7 Iron': 9, '8 Iron': 10,
        '9 Iron': 11, 'PW': 12, '50 Wedge': 13, '52 Wedge': 14, '54 Wedge': 15,
        '56 Wedge': 16, '58 Wedge': 17, '60 Wedge': 18
    }

    # Add numeric club column
    df['club_num'] = df['Club'].map(club_mapping)

    # Drop rows with missing target or key input
    if 'Carry' not in df.columns:
        raise ValueError("CSV must include a 'Carry' column.")
    
    # Define usable features (adapt as needed)
    features = ['club_num', 'Club Speed', 'Ball Speed', 'Launch Angle', 'Spin Rate']
    df_model = df.dropna(subset=['Carry', 'club_num'] + features[1:])

    # Train model
    X = df_model[features]
    y = df_model['Carry']
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X, y)

    # Identify which clubs are missing
    known_clubs = set(df_model['Club'].unique())
    all_possible_clubs = set(club_mapping.keys())
    missing_clubs = sorted(list(all_possible_clubs - known_clubs))

    # Average feature values
    avg_vals = df_model[features[1:]].mean()
    rows = []

    for club in missing_clubs:
        row = avg_vals.copy()
        row['club_num'] = club_mapping[club]
        rows.append(row)

    # Build DataFrame for missing clubs
    df_missing = pd.DataFrame(rows)
    df_missing['Club'] = missing_clubs
    df_missing['Predicted Carry'] = model.predict(df_missing[features])

    return df_missing[['Club', 'Predicted Carry']]


In [12]:
results = predict_carrys("GGXY.csv")
display(results)


  df = df.apply(pd.to_numeric, errors='ignore')


Unnamed: 0,Club,Predicted Carry
0,3 Iron,144.508
1,5 Iron,144.508
2,50 Wedge,144.173
3,52 Wedge,144.173
4,56 Wedge,144.173
5,58 Wedge,144.173
6,6 Iron,144.536
7,60 Wedge,144.173
8,8 Iron,144.536
9,9 Iron,144.536
