In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import PolynomialFeatures

# Load your dataset
data = pd.read_csv('data_g.csv')  # Replace with your actual file path

# Define features (X) and target (y)
X = data[['curing_days', 'cement', 'flyash', 'water', 'sa', 'viscosity', 'max_airt', 'max_var']]
y = data["UCS"]

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create polynomial features
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Define the hyperparameter tuning space for Random Forest
rf_param_dist = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt']
}

# Initialize the Random Forest model
rf_model = RandomForestRegressor(random_state=42)

# Perform RandomizedSearchCV
rf_search = RandomizedSearchCV(
    estimator=rf_model,
    param_distributions=rf_param_dist,
    n_iter=100,
    scoring='neg_mean_squared_error',
    cv=5,
    random_state=42,
    n_jobs=-1
)

# Fit the model
rf_search.fit(X_train_poly, y_train)

# Get the best model
best_rf_model = rf_search.best_estimator_
print(f'Best Random Forest Parameters: {rf_search.best_params_}')

# Evaluate the best Random Forest model
rf_y_pred = best_rf_model.predict(X_test_poly)
rf_r2 = r2_score(y_test, rf_y_pred)
rf_rmse = mean_squared_error(y_test, rf_y_pred, squared=False)
rf_mae = mean_absolute_error(y_test, rf_y_pred)

print(f'Random Forest R^2 Score: {rf_r2:.4f}')
print(f'Random Forest RMSE: {rf_rmse:.4f}')
print(f'Random Forest MAE: {rf_mae:.4f}')