# **📚 Installation**

In [1]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline
from category_encoders import HashingEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import PowerTransformer
from sklearn.model_selection import train_test_split

import lazypredict
from lazypredict.Supervised import LazyRegressor

# **📂 Data Import**

In [2]:
# Create a DataFrame from CSV data
result_df = pd.read_csv("FE_golf.csv")

# **🍽️ Pre-Processing**

In [3]:
numeric_features = ["ISS", 
                    "ISC", 
                    "SSI", 
                    "CSI",
                    "shot",
                    "distance_to_pin",
                    "dist_from_edge",
                    "dist_from_center",
                    "pin_minus_edge", 
                    "Actual Yard",
                    "yardage",
                    "hole_completeness"]

categorical_features = ['round', 
                        'par_value', 
                        'lie',
                        'slope', 
                        'elevation', 
                        'non_putting_dist_from_center_bins',
                        'non_putting_dist_from_edge_bins',
                        'non_putting_distance_to_pin_bins', 
                        'around_the_green',
                        'side_of_hole', 
                        'dog_legs', 
                        'HCP',
                        'shot_type',
                        'from_location_scorer', 
                        'from_location_laser']

numeric_transformer = Pipeline(steps=[
    ("PowerTransformer", PowerTransformer())
])

categorical_transformer = Pipeline(steps=[
    ("HashingEncoder", HashingEncoder())
])

preprocessor = ColumnTransformer(transformers=[
    ("num_transform", numeric_transformer, numeric_features),
    ("cat_transform", categorical_transformer, categorical_features)
])

# **🔪 Split**

In [4]:
feature_cols = categorical_features + numeric_features
X = result_df.loc[:, feature_cols]

target_cols = ['strokes_to_hole_out']
y = result_df.loc[:, target_cols]

# Extract the columns for stratification
stratify_cols = ['non_putting_distance_to_pin_bins','round','par_value']
stratify_data = result_df[stratify_cols]

X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, test_size=0.2,random_state=42,stratify=stratify_data)

# Apply the preprocessor to the training and validation data
X_train_transformed = preprocessor.fit_transform(X_train)
X_valid_transformed = preprocessor.transform(X_valid)

# **🤖 Lazy Predict**

In [5]:
reg = LazyRegressor(verbose=0,ignore_warnings=False, custom_metric=None )

models,predictions = reg.fit(X_train_transformed, X_valid_transformed, y_train.values.ravel(), y_valid.values.ravel())

models

 79%|███████▊  | 33/42 [00:32<00:05,  1.56it/s]

QuantileRegressor model failed to execute
Solver interior-point is not anymore available in SciPy >= 1.11.0.


100%|██████████| 42/42 [00:46<00:00,  1.10s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000410 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2445
[LightGBM] [Info] Number of data points in the train set: 6528, number of used features: 23
[LightGBM] [Info] Start training from score 2.504136





Unnamed: 0_level_0,Adjusted R-Squared,R-Squared,RMSE,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
GradientBoostingRegressor,0.88,0.88,0.41,3.9
LGBMRegressor,0.88,0.88,0.42,0.16
HistGradientBoostingRegressor,0.87,0.88,0.42,0.68
RandomForestRegressor,0.87,0.87,0.43,11.6
SVR,0.87,0.87,0.43,1.77
MLPRegressor,0.86,0.87,0.44,4.36
ExtraTreesRegressor,0.86,0.86,0.44,2.93
NuSVR,0.86,0.86,0.44,4.57
XGBRegressor,0.86,0.86,0.44,0.2
BaggingRegressor,0.86,0.86,0.45,1.2
