# Gradient boost

![image.png](attachment:image.png)

Gradient boosting is a machine learning ensemble technique that combines the predictions of multiple weak learners, typically decision trees, sequentially.

# 1 . Import Python Modules

In [5]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
import pandas as pd

In [6]:
# Create a dictionary containing the data
data = {
    'Years of Exp': [1, 1.5, 2.5, 3, 5, 6],
    'Gap': [0, 1, 1, 0, 0, 0],
    'Annual salary': [4, 4, 5.5, 7, 7.5, 8]
}
# Create a pandas DataFrame from the dictionary
df = pd.DataFrame(data)

X=df.drop('Annual salary',axis=1)
Y=df["Annual salary"]

# 2. Implement - Gradient-Boosting -Regression

In [10]:
def gradient_regression_model(X,Y,n_estimators):
    # Create a decision tree regressor with a maximum depth of 1
    predicted_values = np.full_like(Y, np.mean(Y))
    #print("base -predicted:", predicted_values)
    error_values = np.array(Y - np.mean(Y))
    #print("base-errors",error_values)

    for i in range(n_estimators):

        tree_reg = DecisionTreeRegressor(max_depth=1)
        tree_reg.fit(X, error_values)

        # Find the average target value in each leaf node
        # Leaf nodes can be found using the tree's `apply` method
        leaf_nodes = tree_reg.apply(X)

        leaf_values = {}
        for node in set(leaf_nodes):
            leaf_values[node] = error_values[leaf_nodes == node].mean()

        # Create a target column based on the average target value of the leaf node
        target_column = np.array([leaf_values[node] for node in leaf_nodes])

        # Print the target column
        #print("Target column based on leaf node averages:")
        #print(target_column)
        predicted_values=predicted_values+0.1*(target_column)
        #print("base -predicted:", predicted_values)
        error_values = np.array(Y - predicted_values)



    # test set RMSE
    test_rmse = mean_squared_error(Y, predicted_values) ** (1 / 2)

    # Print rmse
    print('Root mean Square error: {:.2f}'.format(test_rmse))
    
    
gradient_regression_model(X,Y,n_estimators=50)

Root mean Square error: 0.05


In [8]:
# Create and fit the gradient boosting regressor model
from sklearn.ensemble import GradientBoostingRegressor
gb_regressor = GradientBoostingRegressor(loss='absolute_error',n_estimators=30)
gb_regressor.fit(X, Y)

# Predict on the test set
Y_pred = gb_regressor.predict(X)

# Calculate Mean Squared Error (MSE)
rmse = mean_squared_error(Y, Y_pred) ** (1 / 2)
print("Root Mean Squared Error:", rmse)

Root Mean Squared Error: 0.33509607056228347
