In [73]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statistics import mean

from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


In [74]:
X, y = make_regression(
    n_samples=300,     # Number of samples
    n_features=2,      # Number of features
    noise=15,          # Add noise to make it realistic
    random_state=42    # Set seed for reproducibility
)

# Add some nonlinearity to the target variable
y = y + 500 * np.sin(X[:, 0]) + 250 * np.cos(X[:, 1])

**Helper Functions**

In [75]:
import plotly.graph_objects as go

def plot_3d_regression(model, X, y, model_name):

    model.fit(X, y)

    # Create a mesh grid for the features
    x_grid, y_grid = np.meshgrid(np.linspace(min(X[:, 0]), max(X[:, 0]), 100),
                                np.linspace(min(X[:, 1]), max(X[:, 1]), 100))
    grid = np.vstack((x_grid.flatten(), y_grid.flatten())).T 

    predictions = model.predict(grid)

    # Create 3D scatter plot for training data
    scatter = go.Scatter3d(
        x=X[:, 0], y=X[:, 1], z=y,
        mode='markers', marker=dict(color='blue', size=5), name='Training Data'
    )

    # Create 3D surface plot for the regression surface
    surface = go.Surface(
        x=x_grid, y=y_grid, z=predictions.reshape(x_grid.shape), opacity=0.5, colorscale='Viridis', name='Regression Surface'
    )

    # Combine both traces into one figure
    fig = go.Figure(data=[scatter, surface])

    # Update layout for better visualization
    fig.update_layout(
        title=f'Training Data and Regression Surface for {model_name}',
        scene=dict(
            xaxis_title='Feature 1',
            yaxis_title='Feature 2',
            zaxis_title='Target'
        )
    )

    # Show plot
    fig.show()

# Linear Regression
## OLS

In [76]:
class ols_regression():

    # Initialize the class
    def __init__(self):
        pass       
    
    def fit(self, X, y):
        '''Fit the regression to the X data via the OLS equation'''

        # Add a leading colums of 1s to the X data to account for the bias term
        X = np.hstack((np.ones((X.shape[0], 1)), X))

        # Train the data on (X.T @ X)^(-1) @ X.T @ y
        ols = np.linalg.inv(X.T.dot(X)).dot(X.T.dot(y))
        self.coef = ols[1:]
        self.bias = ols[0]

    def predict(self, X):
        '''Predict new data with the trained coefficients and bias'''

        # Check if the X data is 1D and reshape if needed
        if X.ndim == 1:
                    X = X.reshape(-1, 1) 

        # Make predictions by dotting the new data with the coefficients and adding the bias
        self.predictions = X.dot(self.coef) + self.bias
        
        return self.predictions


In [77]:
ols = ols_regression()

plot_3d_regression(ols, X, y, model_name='OLS')


## Gradient Descent Regression

Update weights and bias each with -derr * eta

In [78]:
class GDRegression():
    def __init__(self, epochs, eta):
        '''Initialize the Gradient Descent Regression Class'''
        self.epochs = epochs
        self.eta = eta

    def fit(self, X, y, batch_size = X.shape[0]):
        '''Train the Gradient Descent Regression Class'''
        # Create random initialization for the bias and coefficients
        bias = np.random.random()
        coef = np.random.random(X.shape[1])

        # Iterate through each epoch
        for iter in range(self.epochs):
            
            indices = np.random.choice(X.shape[0], size=min(batch_size, len(y)), replace=False)
            X_batch = X[indices]
            y_batch = y[indices]

            # Make predictions for the X data being trained on
            y_hat = X_batch.dot(coef) + bias

            # Calculate the derrivative WRT bias and coef given the predicions
            derr_b = 2/X_batch.shape[0] * sum((y_hat - y_batch))
            derr_c = 2/X_batch.shape[0] * X_batch.T.dot(y_hat - y_batch)

            # Update the bias and the coef based on the derrivative
            bias = bias - derr_b * self.eta
            coef = coef - derr_c * self.eta

        # Finalize the bias and coef
        self.bias = bias
        self.coef = coef

    def predict(self, X):
        '''Predict new data given the learned bias and coef'''
        predictions = X.dot(self.coef) + self.bias
        return predictions

        

In [79]:
gd_reg = GDRegression(epochs=10000, eta=.01)
plot_3d_regression(gd_reg, X, y, 'Gradient Descent')

## KNN Regression

In [80]:
class KNNRegressor():
    def __init__(self, n_neighbors=5):
        '''Initialize the regressor with a defined number of nearest neighbors'''
        self.n_neighbors = n_neighbors

    def fit(self, X, y):
        '''Train the regressor by loading in all X and y data'''
        self.X = X
        self.y = y

    def predict(self, X):
        '''Make predictions based on the training data using euclidian distance'''
        predictions = np.empty(0)

        # For each test point...
        for test_point in X:
            # Calculate the distance between the test point and all training points
            distances = np.linalg.norm(self.X - test_point, axis=1)

            # Find the n_neighbors closest points
            closest_points_indices = np.argsort(distances)[:self.n_neighbors]

            # Use the mean of the closest points to formulate a predictions and append to the predictions array
            prediction = mean(self.y[closest_points_indices])
            predictions = np.append(predictions, prediction)

        return predictions

In [81]:
knn_regressor = KNNRegressor()

plot_3d_regression(knn_regressor, X, y, "K-Nearest Neighbors Regression")

In [82]:
class DecisionTreeRegressor():
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    # Function for calculating the MSE of a split
    def mse(self, y):
        return np.mean((y - np.mean(y)) ** 2)
    
    # Function to find the best split at any given point (based on MSE)
    def _best_split(self, X, y):
        self.best_mse = float('inf')
        self.best_feature = None
        self.best_split_value = None
        self.best_left_y = None
        self.best_right_y = None

        for feature_num in range(X.shape[1]):
            feature_values = np.unique(X[:, feature_num])
            for value in feature_values:
                left_index = X[:, feature_num] <= value
                right_index = X[:, feature_num] > value

                left_targets = y[left_index]
                right_targets = y[right_index]

                if len(left_targets) > 0 and len(right_targets) > 0:
                    left_mse = self.mse(left_targets)
                    right_mse = self.mse(right_targets)
                    total_average_mse = left_mse * len(left_targets)/len(y) + right_mse * len(right_targets)/len(y)

                    if total_average_mse < self.best_mse:
                        self.best_mse = total_average_mse
                        self.best_feature = feature_num
                        self.best_split_value = value
                        self.left_y = left_targets
                        self.right_y = right_targets

        return self.best_split_value, self.best_feature, self.left_y, self.right_y
    
    def _build_tree(self, X, y, depth=0):
        if len(np.unique(y)) == 1 or (self.max_depth is not None and depth >= self.max_depth):
            return np.mean(y)
        
        best_split_value, best_feature, left_y, right_y = self._best_split(X, y)

        if best_feature == None:
            return np.mean(y)
        
        left_index = X[:, best_feature] <= best_split_value
        right_index = X[:, best_feature] > best_split_value

        left_tree = self._build_tree(X[left_index], left_y)
        right_tree = self._build_tree(X[right_index], right_y)

        return {
            'feature': best_feature,
            'value': best_split_value,
            'left': left_tree,
            'right': right_tree
        }
    
    def _single_prediction(self, tree, x):
        if isinstance(tree, dict):
            if x[tree['feature']] < tree['value']:
                return self._single_prediction(tree['left'], x)
            else:
                return self._single_prediction(tree['right'], x)
        else:
            return tree
        
    def predict(self, X):
        predictions = np.array([self._single_prediction(self.tree, x) for x in X])
        return predictions

    def fit(self, X, y):
        self.tree = self._build_tree(X, y)

In [83]:
# import numpy as np

# class DecisionTreeRegressor:
#     def __init__(self, max_depth=None):
#         self.max_depth = max_depth  # Maximum depth of the tree
    
#     # Function to calculate Mean Squared Error (MSE)
#     def mean_squared_error(self, y):
#         return np.mean((y - np.mean(y)) ** 2)
    
#     # Function to compute the best split (based on MSE reduction)
#     def best_split(self, X, y):
#         best_mse = float('inf')
#         best_feature = None
#         best_value = None
#         best_left_y = None
#         best_right_y = None
        
#         # Iterate through all features to find the best split
#         for feature_idx in range(X.shape[1]):
#             unique_values = np.unique(X[:, feature_idx])
#             for value in unique_values:
#                 left_mask = X[:, feature_idx] <= value
#                 right_mask = ~left_mask
                
#                 left_y = y[left_mask]
#                 right_y = y[right_mask]
                
#                 if len(left_y) > 0 and len(right_y) > 0:
#                     mse_left = self.mean_squared_error(left_y)
#                     mse_right = self.mean_squared_error(right_y)
#                     mse_split = (len(left_y) * mse_left + len(right_y) * mse_right) / len(y)
                    
#                     if mse_split < best_mse:
#                         best_mse = mse_split
#                         best_feature = feature_idx
#                         best_value = value
#                         best_left_y = left_y
#                         best_right_y = right_y
                        
#         return best_feature, best_value, best_left_y, best_right_y
    
#     # Function to build the tree recursively
#     def build_tree(self, X, y, depth=0):
#         # If all values are the same or max_depth reached, return leaf node with mean value
#         if len(np.unique(y)) == 1 or (self.max_depth is not None and depth >= self.max_depth):
#             return np.mean(y)
        
#         # Find the best feature to split on
#         best_feature, best_value, left_y, right_y = self.best_split(X, y)
        
#         if best_feature is None:
#             return np.mean(y)  # If no valid split, return leaf with mean value
        
#         # Split the dataset based on the chosen feature
#         left_mask = X[:, best_feature] <= best_value
#         right_mask = ~left_mask
        
#         left_tree = self.build_tree(X[left_mask], left_y, depth + 1)
#         right_tree = self.build_tree(X[right_mask], right_y, depth + 1)
        
#         return {
#             'feature': best_feature,
#             'value': best_value,
#             'left': left_tree,
#             'right': right_tree
#         }
    
#     # Function to predict a single sample based on the tree
#     def predict_single(self, tree, x):
#         if isinstance(tree, dict):
#             if x[tree['feature']] <= tree['value']:
#                 return self.predict_single(tree['left'], x)
#             else:
#                 return self.predict_single(tree['right'], x)
#         else:
#             return tree
    
#     # Function to predict for all samples
#     def predict(self, X):
#         return np.array([self.predict_single(self.tree, x) for x in X])
    
#     # Function to fit the model
#     def fit(self, X, y):
#         self.tree = self.build_tree(X, y)


In [84]:
dt_reg = DecisionTreeRegressor()
plot_3d_regression(dt_reg, X, y, "Decision Tree")

AttributeError: 'DecisionTreeRegressor' object has no attribute 'single_prediction'