In [22]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor

In [23]:
df = pd.read_csv("concrete_data.csv")
print("Dataset Shape:", df.shape)
print(df.head())

Dataset Shape: (1030, 9)
   Cement  Blast Furnace Slag  Fly Ash  Water  Superplasticizer  \
0   540.0                 0.0      0.0  162.0               2.5   
1   540.0                 0.0      0.0  162.0               2.5   
2   332.5               142.5      0.0  228.0               0.0   
3   332.5               142.5      0.0  228.0               0.0   
4   198.6               132.4      0.0  192.0               0.0   

   Coarse Aggregate  Fine Aggregate  Age  Strength  
0            1040.0           676.0   28     79.99  
1            1055.0           676.0   28     61.89  
2             932.0           594.0  270     40.27  
3             932.0           594.0  365     41.05  
4             978.4           825.5  360     44.30  


In [24]:
# ----- Split Features and Target -----
X = df.iloc[:, :-1].values  # all columns except last
y = df.iloc[:, -1].values   # last column is the strength

In [25]:
# ----- Split Train-Test -----
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [26]:
# ----- Custom Gradient Boosting Regressor -----
def gradient_boost(X, y, n_estimators=100, lr=0.1, max_depth=3, plot=False):
    models = []
    y_pred = np.zeros_like(y, dtype=np.float64)

    for i in range(n_estimators):
        residual = y - y_pred
        tree = DecisionTreeRegressor(max_depth=max_depth, random_state=42)
        tree.fit(X, residual)
        update = lr * tree.predict(X)
        y_pred += update
        models.append(tree)

        mse = mean_squared_error(y, y_pred)
        if i % 10 == 0:
            print(f"Iteration {i+1}/{n_estimators} - MSE: {mse:.4f}")

    return y_pred, models

In [27]:
# ----- Train Custom Boosting Model -----
y_train_pred, models = gradient_boost(X_train, y_train, n_estimators=100, lr=0.1, max_depth=4)

Iteration 1/100 - MSE: 1285.3991
Iteration 11/100 - MSE: 198.0956
Iteration 21/100 - MSE: 48.6765
Iteration 31/100 - MSE: 23.3999
Iteration 41/100 - MSE: 16.6623
Iteration 51/100 - MSE: 13.7252
Iteration 61/100 - MSE: 12.1287
Iteration 71/100 - MSE: 10.6428
Iteration 81/100 - MSE: 9.5541
Iteration 91/100 - MSE: 8.7238


In [28]:
# ----- Predict on Test Data -----
y_test_pred = np.zeros_like(y_test)
for model in models:
    y_test_pred += 0.1 * model.predict(X_test)

In [29]:
# # ----- Evaluate -----
# r2 = r2_score(y_test, y_test_pred)
# mse = mean_squared_error(y_test, y_test_pred)

In [30]:
# Evaluation
mse = mean_squared_error(y_test, y_test_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_test_pred)
r2 = r2_score(y_test, y_test_pred)

In [31]:
print("ðŸ“Š Gradient Boosting Evaluation:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"RÂ² Score: {r2:.4f}")

ðŸ“Š Gradient Boosting Evaluation:
Mean Absolute Error (MAE): 3.57
Root Mean Squared Error (RMSE): 5.05
RÂ² Score: 0.9010
