# **📚 Installation**

In [1]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, RobustScaler
from sklearn.model_selection import train_test_split

import lazypredict
from lazypredict.Supervised import LazyRegressor

# **📂 Data Import**

In [2]:
# Create a DataFrame from CSV data
result_df = pd.read_csv("FE_golf.csv")

# **🍽️ Pre-Processing**

In [3]:
# Approach Data
result_df_no_green = result_df[result_df['from_location_scorer'] != 'Green']

numeric_features = ["ISS", 
                    "ISC", 
                    "SSI", 
                    "CSI",
                    "shot",
                    "distance_to_pin",
                    "dist_from_edge",
                    "dist_from_center",
                    "pin_minus_edge", 
                    "Actual Yard",
                    "yardage",
                    "hole_completeness"]

categorical_features = ['round', 
                        'par_value', 
                        'lie',
                        'slope', 
                        'elevation', 
                        'non_putting_dist_from_center_bins',
                        'non_putting_dist_from_edge_bins',
                        'non_putting_distance_to_pin_bins', 
                        'around_the_green',
                        'side_of_hole', 
                        'dog_legs', 
                        'HCP',
                        'shot_type',
                        'from_location_scorer', 
                        'from_location_laser']

numeric_transformer = Pipeline(steps=[
    ("scaler", RobustScaler())
])

categorical_transformer = Pipeline(steps=[
    ("onehot", OneHotEncoder(handle_unknown="ignore", drop='first', sparse_output=False))
])

preprocessor = ColumnTransformer(transformers=[
    ("num_transform", numeric_transformer, numeric_features),
    ("cat_transform", categorical_transformer, categorical_features)
])

# **🔪 Split**

In [4]:
feature_cols = categorical_features + numeric_features
X = result_df.loc[:, feature_cols]

target_cols = ['strokes_to_hole_out']
y = result_df.loc[:, target_cols]

# Extract the columns for stratification
stratify_cols = ['non_putting_dist_from_center_bins','non_putting_distance_to_pin_bins','par_value']
stratify_data = result_df[stratify_cols]

X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, test_size=0.2,random_state=42,stratify=stratify_data)

# Apply the preprocessor to the training and validation data
X_train_transformed = preprocessor.fit_transform(X_train)
X_valid_transformed = preprocessor.transform(X_valid)

# **🤖 Lazy Predict**

In [5]:
reg = LazyRegressor(verbose=0,ignore_warnings=False, custom_metric=None )

models,predictions = reg.fit(X_train_transformed, X_valid_transformed, y_train.values.ravel(), y_valid.values.ravel())

models

 76%|███████▌  | 32/42 [00:41<00:07,  1.38it/s]

QuantileRegressor model failed to execute
Solver interior-point is not anymore available in SciPy >= 1.11.0.


100%|██████████| 42/42 [01:02<00:00,  1.49s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001507 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2545
[LightGBM] [Info] Number of data points in the train set: 6528, number of used features: 69
[LightGBM] [Info] Start training from score 2.509191





Unnamed: 0_level_0,Adjusted R-Squared,R-Squared,RMSE,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
GradientBoostingRegressor,0.88,0.88,0.42,5.22
HistGradientBoostingRegressor,0.88,0.88,0.42,1.68
LGBMRegressor,0.88,0.88,0.42,0.23
RandomForestRegressor,0.87,0.88,0.43,16.87
SVR,0.86,0.87,0.44,3.32
BayesianRidge,0.86,0.87,0.44,0.05
RidgeCV,0.86,0.87,0.45,0.07
Ridge,0.86,0.87,0.45,0.04
NuSVR,0.86,0.87,0.45,5.21
BaggingRegressor,0.86,0.87,0.45,1.33
