In [246]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [247]:
dataset = pd.read_csv('SGnKV.csv')
dataset.head()

Unnamed: 0,GRADATION,AGGREGATE SOURCE,BINDER TYPE,DESIGN AGGREGATE GRADATION,AV,COMPACTION,PERMANENT STRAIN (%),CTINDEX,Gmb,AV.1,VMA,VFB
0,19.0,2.5,350,5.58,7.243,35,2.362,47.675,2.353,7.243,18.011,59.787
1,19.0,2.5,350,5.58,5.982,50,1.976,50.025,2.375,5.982,16.897,64.597
2,19.0,2.5,350,5.58,4.003,75,1.644,41.935,2.425,4.003,15.148,73.576
3,19.0,2.5,350,5.58,3.449,90,1.161,34.315,2.439,3.449,14.658,76.475
4,19.0,2.5,350,5.58,3.262,110,0.958,37.28,2.442,3.262,14.493,77.492


In [248]:
X = dataset.iloc[:, 0:5]
y = dataset.iloc[:,7]
X0= dataset.iloc[:,0]
X1= dataset.iloc[:,1]
X2= dataset.iloc[:,2]
X3= dataset.iloc[:,3]

In [249]:
y

0      47.675
1      50.025
2      41.935
3      34.315
4      37.280
        ...  
295    84.485
296    61.015
297    59.190
298    50.840
299    49.970
Name: CTINDEX, Length: 300, dtype: float64

In [250]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [251]:
# Initialize the models
dt_model = DecisionTreeRegressor(random_state=42)
gbr_model = GradientBoostingRegressor(random_state=42)
rfr_model = RandomForestRegressor(random_state=42)

In [252]:

# Train the models
dt_model.fit(X_train, y_train)
gbr_model.fit(X_train, y_train)
rfr_model.fit(X_train, y_train)

In [253]:
# Make predictions on the test set
dt_pred = dt_model.predict(X_test)
gbr_pred = gbr_model.predict(X_test)
rfr_pred = rfr_model.predict(X_test)


In [254]:
# Evaluate the models
def evaluate_model(y_test, y_pred, model_name):
    mse = mean_squared_error(y_test, y_pred)
    rmse = mse ** 0.5
    r2 = r2_score(y_test, y_pred)
    print(f"{model_name} Performance:")
    print(f"RMSE: {rmse}")
    print(f"R2 Score: {r2}")
    print('-' * 40)
    return rmse, r2

In [255]:
# Evaluate each model
dt_rmse, dt_r2 = evaluate_model(y_test, dt_pred, "Decision Tree")
gbr_rmse, gbr_r2 = evaluate_model(y_test, gbr_pred, "Gradient Boosting Regressor")
rfr_rmse, rfr_r2 = evaluate_model(y_test, rfr_pred, "Random Forest Regressor")


Decision Tree Performance:
RMSE: 10.25383206822373
R2 Score: 0.7407791272764326
----------------------------------------
Gradient Boosting Regressor Performance:
RMSE: 7.512184569697973
R2 Score: 0.8608670264854327
----------------------------------------
Random Forest Regressor Performance:
RMSE: 7.894552297209447
R2 Score: 0.8463429187197302
----------------------------------------


In [256]:
# Determine the best model based on RMSE or R2 score
models_performance = {
    'Decision Tree': dt_rmse,
    'Gradient Boosting Regressor': gbr_rmse,
    'Random Forest Regressor': rfr_rmse
}
best_model_name = min(models_performance, key=models_performance.get)
print(f"The best model is: {best_model_name} with RMSE: {models_performance[best_model_name]}")


The best model is: Gradient Boosting Regressor with RMSE: 7.512184569697973


In [257]:
# Optionally, save the best model for future use
if best_model_name == "Decision Tree":
    best_model = dt_model
elif best_model_name == "Gradient Boosting Regressor":
    best_model = gbr_model
else:
    best_model = rfr_model


In [258]:
best_predictions = best_model.predict(X_test)

In [259]:
input_value = [[19, 17, 822, 5.58, 7.243]]

In [260]:
if len(input_value[0]) != X_train.shape[1]:
    print(f"Error: Input must have {X_train.shape[1]} features. Given: {len(input_value[0])}")
else:
    # Make a prediction using the best model
    prediction = best_model.predict(input_value)
    print(f"Prediction for input {input_value}: {prediction[0]}")

Prediction for input [[19, 17, 822, 5.58, 7.243]]: 54.94186818356865




In [261]:
import pickle

with open('C.pkl', 'wb') as file:
    pickle.dump(best_model, file)


In [262]:
# # Load the saved model
# with open('C.pkl', 'rb') as file:
#     best_model = pickle.load(file)

# # Make predictions with the loaded model
# prediction = best_model.predict(input_value)
# print(f"Prediction for input {input_value}: {prediction[0]}")

