<a href="https://colab.research.google.com/github/abhi1628/GBRF_ML_Package/blob/main/GBRF_Tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install gbrf

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gbrf
  Downloading gbrf-1.0.7.tar.gz (3.9 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: gbrf
  Building wheel for gbrf (setup.py) ... [?25l[?25hdone
  Created wheel for gbrf: filename=gbrf-1.0.7-py3-none-any.whl size=3853 sha256=b43d64135f6d0b593b9cc17a2b6733dfebecab16e41bca13a6b18d8b9ea84842
  Stored in directory: /root/.cache/pip/wheels/95/ec/1b/a037348207cafb9efb503c95db05b3c8a6884e7bd5a4cad945
Successfully built gbrf
Installing collected packages: gbrf
Successfully installed gbrf-1.0.7


## Calculate the R-squared score and mean square error using GBRF ML Algorithm on breast cancer dataset

In [29]:
from sklearn.datasets import load_breast_cancer
from gbrf.gbrf import GBRF

# Load the breast cancer dataset
data = load_breast_cancer(as_frame=True)

# Extract features and target
X = data.data
y = data.target

# Initialize GBRF classifier with random_state=42
gbrf = GBRF(random_state=42)

# Fit the classifier on the data
gbrf.fit(X, y)

# Print the R-squared score of the model
print("R-squared score:", gbrf.score())

# Print the mean squared error of the model
print("Mean squared error:", gbrf.mse())


R-squared score: 0.86707107288374
Mean squared error: 0.031241450619626663


## Calculate the R-squared score on the testing data

In [6]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from gbrf import GBRF

# Load breast cancer dataset from scikit-learn
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the GBRF model
model = GBRF(random_state=42)

# Fit the model on the training data
model.fit(X_train, y_train)

# Calculate the R-squared score on the testing data
r2 = model.score()

# Print the R-squared score
print("R-squared score:", r2)


R-squared score: 0.8199108005278766


## Determining the model accuracy

In [8]:
import gbrf
from sklearn.datasets import load_breast_cancer

# Load the breast cancer dataset
data = load_breast_cancer()

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

# Create a GBRF model
model = gbrf.GBRF()

# Fit the model to the training data
model.fit(X_train, y_train)

# Print the model's accuracy on the test set
print("Accuracy:", model.score())

Accuracy: 0.8254537943535414


## Comparing SVM and GBRF

In [12]:
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from gbrf import GBRF
from sklearn.metrics import mean_squared_error

# Load the California Housing dataset
X, y = fetch_california_housing(return_X_y=True)

# Scale the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train and evaluate the SVR model
svr = SVR()
svr.fit(X_train, y_train)
y_pred_svr = svr.predict(X_test)
mse_svr = mean_squared_error(y_test, y_pred_svr)
print("SVR MSE: {:.4f}".format(mse_svr))

# Train and evaluate the GBRF model
gbrf = GBRF()
gbrf.fit(X_train, y_train)
y_pred_gbrf = gbrf.predict(X_test)
mse_gbrf = mean_squared_error(y_test, y_pred_gbrf)
print("GBRF MSE: {:.4f}".format(mse_gbrf))


SVR MSE: 0.3471
GBRF MSE: 0.2916


## Comparing Random Forest and GBRF

In [14]:
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from gbrf import GBRF

# Load the California Housing dataset
X, y = fetch_california_housing(return_X_y=True)

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a Random Forest model
rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)

# Evaluate the Random Forest model
y_pred_rf = rf.predict(X_test)
r2_rf = r2_score(y_test, y_pred_rf)
print("Random Forest R2 score: {:.4f}".format(r2_rf))

# Train a GBRF model
gbrf = GBRF(random_state=42)
gbrf.fit(X_train, y_train)

# Evaluate the GBRF model
y_pred_gbrf = gbrf.predict(X_test)
r2_gbrf = r2_score(y_test, y_pred_gbrf)
print("GBRF R2 score: {:.4f}".format(r2_gbrf))


Random Forest R2 score: 0.8046
GBRF R2 score: 0.7817


## Comparing GradientBoost and GBRF

In [16]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import cross_val_score, train_test_split
from gbrf import GBRF

# Load the California Housing dataset
X, y = fetch_california_housing(return_X_y=True)

# split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# create instances of GradientBoostingRegressor and GBRF
gbr = GradientBoostingRegressor(random_state=42)
gbrf = GBRF(random_state=42)

# fit models
gbr.fit(X_train, y_train)
gbrf.fit(X_train, y_train)

# evaluate models on test set
gbr_pred = gbr.predict(X_test)
gbrf_pred = gbrf.predict(X_test)

gbr_mse = mean_squared_error(y_test, gbr_pred)
gbr_r2 = r2_score(y_test, gbr_pred)

gbrf_mse = mean_squared_error(y_test, gbrf_pred)
gbrf_r2 = r2_score(y_test, gbrf_pred)

print("GradientBoostingRegressor")
print("MSE: {:.3f}, R2: {:.3f}".format(gbr_mse, gbr_r2))
print("GBRF")
print("MSE: {:.3f}, R2: {:.3f}".format(gbrf_mse, gbrf_r2))



GradientBoostingRegressor
MSE: 0.294, R2: 0.776
GBRF
MSE: 0.289, R2: 0.779


## Save a trained model and then make prediction using it after loading the saved model

In [21]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from gbrf import GBRF
import joblib

# Load the California Housing dataset
X, y = fetch_california_housing(return_X_y=True)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an instance of GBRF
model = GBRF()

# Train the model
model.fit(X_train, y_train)

# Save the model using joblib
joblib.dump(model, 'gbrf_model.joblib')

# Load the model from file
loaded_model = joblib.load('gbrf_model.joblib')

# Make predictions using the loaded model
y_pred = loaded_model.predict(X_test)

# Print the mean squared error of the predictions
mse = np.mean((y_pred - y_test) ** 2)
print('Mean Squared Error:', mse)


Mean Squared Error: 0.29082382176987237
