In [25]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error

class LightGBMFromScratch:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3, min_samples_split=2):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.trees = []
        self.init_prediction = 0

    def _loss_gradient(self, y, y_pred):
        """Gradient of the squared error loss."""
        grad = y - y_pred  # Negative gradient for squared error
        hess = np.ones_like(y)  # Constant hessian for squared error
        return grad, hess

    def fit(self, X, y):
        """Train the model."""
        # Initialize predictions with the mean of the target
        self.init_prediction = np.mean(y)
        y_pred = np.full(y.shape, self.init_prediction)

        for _ in range(self.n_estimators):
            # Compute gradients and hessians
            grad, hess = self._loss_gradient(y, y_pred)

            # Fit a new decision tree to the negative gradient
            tree = DecisionTreeRegressor(max_depth=self.max_depth, min_samples_split=self.min_samples_split)
            tree.fit(X, grad)

            # Store the tree
            self.trees.append(tree)

            # Update predictions
            y_pred += self.learning_rate * tree.predict(X)

    def predict(self, X):
        """Predict using the trained model."""
        y_pred = np.full((X.shape[0],), self.init_prediction)
        for tree in self.trees:
            y_pred += self.learning_rate * tree.predict(X)
        return y_pred





In [26]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

dataset=pd.read_csv('pd_speech_features.csv',header=1)

dataset.drop_duplicates(inplace=True)
dataset.drop('id',axis=1,inplace=True)
col=dataset.columns
dataset.dropna(inplace=True)

x=dataset.drop('class',axis=1)
y=dataset['class']

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=16)

In [29]:
lgbm = LightGBMFromScratch(n_estimators=100, learning_rate=0.1, max_depth=3, min_samples_split=2)
lgbm.fit(X_train, y_train)
y_pred = lgbm.predict(X_test)

y_pred = np.where(y_pred > 0.5, 1, 0)




In [30]:
y_pred

array([1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1])

In [37]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f} ')

Accuracy: 0.87 


In [28]:
from sklearn.metrics import accuracy_score
print('accuracy :' ,accuracy_score(y_test, y_pred.round())*100,'%')
print('mean_squared_error  ',mean_squared_error(y_test, y_pred.round()))


accuracy : 87.2 %
mean_squared_error   0.128
