In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV
from tqdm import tqdm

In [2]:
%matplotlib inline

# Import Excel project files

In [4]:
df = pd.read_csv("UG90 PROJECT.csv") #Create a data frame containing UG90 data called df

# Increasing the X and Y coordinates by 0.001
df_mod = df.copy()
df_mod['X'] = df['X'] + 0.001
df_mod['Y'] = df['Y'] + 0.001

In [5]:
df_lease = pd.read_excel('lease Boundary.xlsx')
df_measured = pd.read_excel('UG2 measured.xlsx')

# Test-Train Split

In [6]:
all_features = ['X','Y']
target = 'PGE'

In [7]:
X_train, X_test, y_train, y_test = train_test_split(df_mod[all_features], df_mod[target], test_size=0.2, random_state=99)

# Grid Search Models and parameters

In [8]:
# Define your models and parameter grids
models = {
    'Decision Tree': DecisionTreeRegressor(),
    'Random Forest': RandomForestRegressor(),
    'SVR': SVR()
}

In [9]:
param_grids = {
    'Decision Tree': {
        'max_depth': [3, 30, 50],
        'min_samples_split': [2, 5, 10],
        'criterion': ['mse','friedman_mse','mae'],
        'min_samples_leaf':[1,5,10,15,20],
        'max_leaf_nodes':[10,50,100,200]
    },
    'Random Forest': {
        'n_estimators': [50, 100, 150, 200],  # Number of trees
        'max_depth': [3, 10, 20],
        'min_samples_split': [2, 3, 4, 5, 6, 7, 8, 9, 10],
        'criterion': ['mse','friedman_mse','mae']
    },
    
}

In [10]:
best_params = {}

# Loop through each model and perform GridSearchCV
for name, model in models.items():
    tqdm.write(f"Starting grid search for {name}...")

    # Use GridSearchCV
    grid_search = GridSearchCV(model, param_grids[name], cv=5, scoring='r2', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    
    # Get the best parameters
    best_params[name] = grid_search.best_params_
    tqdm.write(f"Best parameters for {name}: {best_params[name]}")
    tqdm.write(f"Completed grid search for {name}.\n")

# best_params now contains the best parameter set for each model

Starting grid search for Decision Tree...


1800 fits failed out of a total of 2700.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
98 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\kahnp\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\kahnp\anaconda3\lib\site-packages\sklearn\base.py", line 1145, in wrapper
    estimator._validate_params()
  File "C:\Users\kahnp\anaconda3\lib\site-packages\sklearn\base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\kahnp\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParam

Best parameters for Decision Tree: {'criterion': 'friedman_mse', 'max_depth': 50, 'max_leaf_nodes': 100, 'min_samples_leaf': 1, 'min_samples_split': 2}
Completed grid search for Decision Tree.

Starting grid search for Random Forest...


1080 fits failed out of a total of 1620.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
188 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\kahnp\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\kahnp\anaconda3\lib\site-packages\sklearn\base.py", line 1145, in wrapper
    estimator._validate_params()
  File "C:\Users\kahnp\anaconda3\lib\site-packages\sklearn\base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\kahnp\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidPara

Best parameters for Random Forest: {'criterion': 'friedman_mse', 'max_depth': 10, 'min_samples_split': 3, 'n_estimators': 100}
Completed grid search for Random Forest.

Starting grid search for SVR...


KeyError: 'SVR'