<a href="https://colab.research.google.com/github/cderosia/pga/blob/main/PGA_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [51]:
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from google.colab import files
uploaded = files.upload()

Saving pga_data.csv to pga_data (4).csv


## 1.) Data Prep

a.) Handling Missing Values

In [52]:
data = pd.read_csv('pga_data.csv')
data['Win'].fillna(0, inplace=True)
data['Consecutive_Cuts_Made'].fillna(data['Consecutive_Cuts_Made'].mean(), inplace = True)
data['Drive Yards'].fillna(data['Drive Yards'].mean(), inplace = True)
data['Fairways Hit'].fillna(data['Fairways Hit'].mean(), inplace = True)
data['PUTTS/HOLE'].fillna(data['PUTTS/HOLE'].mean(), inplace = True)
data['Age'].fillna(data['Age'].mean(), inplace = True)
data['sg_putt'].fillna(0, inplace = True)
data['sg_arg'].fillna(0, inplace = True)
data['sg_app'].fillna(0, inplace = True)
data['sg_ott'].fillna(0, inplace = True)
data['sg_t2g'].fillna(0, inplace = True)
data['sg_total'].fillna(0, inplace = True)
data['sg_putt_prev'].fillna(0, inplace = True)
data['sg_arg_prev'].fillna(0, inplace = True)
data['sg_app_prev'].fillna(0, inplace = True)
data['sg_ott_prev'].fillna(0, inplace = True)
data['sg_t2g_prev'].fillna(0, inplace = True)
data['sg_total_prev'].fillna(0, inplace = True)




b.) Removing Undesired Features

In [53]:
cols_to_remove = ['player', 'Height cm', 'Weight lbs', 'DOB', 'player id', 'date',
       'tournament name', 'tournament id', 'season', 'final position', 'major',
       'made_cut', 'Score','Number of Rounds', 'Water', 'Bunkers', 'Slope', 'Length', 'Par', 'major_win']

data.drop(cols_to_remove, axis = 1, inplace = True)
data

Unnamed: 0,Age,Consecutive_Cuts_Made,sg_putt,sg_arg,sg_app,sg_ott,sg_t2g,sg_total,sg_putt_prev,sg_arg_prev,sg_app_prev,sg_ott_prev,sg_t2g_prev,sg_total_prev,Drive Yards,Fairways Hit,PUTTS/HOLE,Win
0,46.000000,1.844772,0.17,-0.93,0.65,-0.30,-0.57,-0.40,0.00,0.00,0.00,0.00,0.00,0.00,286.900000,65.600000,1.840000,0.0
1,47.000000,0.000000,1.06,-1.32,-1.07,-0.73,-3.12,-2.07,0.17,-0.93,0.65,-0.30,-0.57,-0.40,286.900000,65.600000,1.840000,0.0
2,47.000000,1.000000,-1.07,-0.48,0.71,-0.01,0.22,-0.85,1.06,-1.32,-1.07,-0.73,-3.12,-2.07,286.900000,65.600000,1.840000,0.0
3,47.000000,0.000000,-1.78,-0.18,-0.77,-0.52,-1.47,-3.25,-1.07,-0.48,0.71,-0.01,0.22,-0.85,286.900000,65.600000,1.840000,0.0
4,47.000000,0.000000,-2.30,-0.61,1.13,-0.10,0.42,-1.87,-1.78,-0.18,-0.77,-0.52,-1.47,-3.25,286.900000,65.600000,1.840000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14011,21.000000,6.000000,0.61,0.87,1.79,0.79,3.46,4.07,-0.42,0.34,0.08,0.82,1.23,0.82,314.200000,49.800000,1.730000,0.0
14012,22.000000,7.000000,1.00,0.41,1.53,0.31,2.24,3.24,0.61,0.87,1.79,0.79,3.46,4.07,314.200000,49.800000,1.730000,0.0
14013,22.000000,8.000000,-0.31,-1.91,-0.10,-1.10,-3.12,-3.42,1.00,0.41,1.53,0.31,2.24,3.24,314.200000,49.800000,1.730000,0.0
14014,22.000000,9.000000,0.41,-1.03,0.99,-0.86,-0.90,-0.49,-0.31,-1.91,-0.10,-1.10,-3.12,-3.42,314.200000,49.800000,1.730000,0.0


c.) Normalizing the Data

In [54]:
scaler = MinMaxScaler()

# normalized data frame
norm_data = pd.DataFrame(scaler.fit_transform(data), columns = data.columns)


norm_data

Unnamed: 0,Age,Consecutive_Cuts_Made,sg_putt,sg_arg,sg_app,sg_ott,sg_t2g,sg_total,sg_putt_prev,sg_arg_prev,sg_app_prev,sg_ott_prev,sg_t2g_prev,sg_total_prev,Drive Yards,Fairways Hit,PUTTS/HOLE,Win
0,0.613636,0.061492,0.604772,0.448183,0.686151,0.662195,0.582313,0.623891,0.587137,0.573351,0.627698,0.698780,0.621088,0.647546,0.437223,0.629073,0.592593,0.0
1,0.636364,0.000000,0.697095,0.395693,0.531475,0.609756,0.408844,0.525133,0.604772,0.448183,0.686151,0.662195,0.582313,0.623891,0.437223,0.629073,0.592593,0.0
2,0.636364,0.033333,0.476141,0.508748,0.691547,0.697561,0.636054,0.597280,0.697095,0.395693,0.531475,0.609756,0.408844,0.525133,0.437223,0.629073,0.592593,0.0
3,0.636364,0.000000,0.402490,0.549125,0.558453,0.635366,0.521088,0.455352,0.476141,0.508748,0.691547,0.697561,0.636054,0.597280,0.437223,0.629073,0.592593,0.0
4,0.636364,0.000000,0.348548,0.491252,0.729317,0.686585,0.649660,0.536960,0.402490,0.549125,0.558453,0.635366,0.521088,0.455352,0.437223,0.629073,0.592593,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14011,0.045455,0.200000,0.650415,0.690444,0.788669,0.795122,0.856463,0.888232,0.543568,0.619112,0.634892,0.798780,0.704762,0.696038,0.840473,0.233083,0.185185,0.0
14012,0.068182,0.233333,0.690871,0.628533,0.765288,0.736585,0.773469,0.839148,0.650415,0.690444,0.788669,0.795122,0.856463,0.888232,0.840473,0.233083,0.185185,0.0
14013,0.068182,0.266667,0.554979,0.316285,0.618705,0.564634,0.408844,0.445299,0.690871,0.628533,0.765288,0.736585,0.773469,0.839148,0.840473,0.233083,0.185185,0.0
14014,0.068182,0.300000,0.629668,0.434724,0.716727,0.593902,0.559864,0.618569,0.554979,0.316285,0.618705,0.564634,0.408844,0.445299,0.840473,0.233083,0.185185,0.0


d.) Train - Test Split


In [55]:
X = norm_data.drop(columns = ['Win'])
y = norm_data['Win']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=33, stratify=y)


## 2.) Building the Neural Network


In [58]:
model = keras.Sequential([
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

In [59]:
model.compile(optimizer = 'Adam',
              loss = 'binary_crossentropy',
              metrics=['accuracy'])

In [60]:
model.fit(X_train, y_train, epochs=11, batch_size=32) # itrs = epochs * dataset_size / batch_size roughly

Epoch 1/11
Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11


<keras.src.callbacks.History at 0x7f4e3221f280>

In [61]:
model.evaluate(X_test, y_test)



[0.045050013810396194, 0.9907275438308716]