<a href="https://colab.research.google.com/github/cameron-larkin/Machine_learning_Chelsea/blob/main/Hyperparameters.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [41]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score
from math import sqrt

# Load your dataset
# Replace 'your_path/chelsea.csv' with the actual path to your CSV file
df = pd.read_csv('Chelsea.csv')

# Drop non-numeric columns for simplicity
df_numeric = df.select_dtypes(include=['float64', 'int64'])

# Define features (X) and target variable (y)
X = df_numeric.drop(columns=['Value'])  # Assuming 'Value' is your target variable
y = df_numeric['Value']

# Split the data into training and testing sets (80:20 ratio)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree': DecisionTreeRegressor(random_state=42),
    'Random Forest': RandomForestRegressor(random_state=42),
    'SVM': SVR(),
    'Neural Network': MLPRegressor(random_state=42)
}

# Dictionary to store the best models
best_models = {}

# Hyperparameter tuning and model evaluation
for model_name, model in models.items():
    # Define a parameter grid for hyperparameter tuning
    param_grid = {}
    if model_name == 'Linear Regression':
        param_grid = {'fit_intercept': [True, False], 'copy_X': [True, False]}
    elif model_name == 'Decision Tree':
        param_grid = {'max_depth': [None, 10, 20, 30], 'min_samples_split': [2, 5, 10]}
    elif model_name == 'Random Forest':
        param_grid = {'n_estimators': [50, 100, 150], 'max_depth': [None, 10, 20, 30], 'min_samples_split': [2, 5, 10]}
    elif model_name == 'SVM':
        param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
    elif model_name == 'Neural Network':
        param_grid = {'hidden_layer_sizes': [(50,), (100,), (50, 50)], 'alpha': [0.0001, 0.001, 0.01]}

    # Perform GridSearchCV
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error')
    grid_search.fit(X_train, y_train)

    # Display the best hyperparameters
    print(f"Best Hyperparameters for {model_name}:", grid_search.best_params_)

    # Use the best model for predictions
    best_model = grid_search.best_estimator_
    best_models[model_name] = best_model
    model_pred = best_model.predict(X_test)

    # Evaluate the tuned model
    rmse = sqrt(mean_squared_error(y_test, model_pred))
    r_squared = r2_score(y_test, model_pred)
    print(f"{model_name} (Tuned) - RMSE: {rmse:.2f}, R-squared: {r_squared:.2f}")
    print()

Best Hyperparameters for Linear Regression: {'copy_X': True, 'fit_intercept': True}
Linear Regression (Tuned) - RMSE: 897174976.99, R-squared: -1214.49

Best Hyperparameters for Decision Tree: {'max_depth': None, 'min_samples_split': 10}
Decision Tree (Tuned) - RMSE: 34139877.71, R-squared: -0.76

Best Hyperparameters for Random Forest: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}
Random Forest (Tuned) - RMSE: 30849751.48, R-squared: -0.44

Best Hyperparameters for SVM: {'C': 10, 'kernel': 'linear'}
SVM (Tuned) - RMSE: 27128907.53, R-squared: -0.11





Best Hyperparameters for Neural Network: {'alpha': 0.01, 'hidden_layer_sizes': (50, 50)}
Neural Network (Tuned) - RMSE: 55008122.95, R-squared: -3.57



