In [247]:
import numpy as np
import pandas as pd
import sqlite3
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_iris, load_linnerud
import matplotlib.pyplot as plt

# Data Preprocessing and Evaluation

Load classification and regression datasets

In [193]:
# Load datasets
regression_dataset = load_linnerud()
classification_dataset = load_iris()

# Split into inputs and targets
regression_X, regression_Y = regression_dataset.data, regression_dataset.target
classification_X, classification_Y = classification_dataset.data, classification_dataset.target

# Preprocess datasets
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', MinMaxScaler())
])

regression_X = numerical_transformer.fit_transform(regression_X)
classification_X = numerical_transformer.fit_transform(classification_X)

# Split into training and testing sets
regression_X_train, regression_X_test, regression_Y_train, regression_Y_test = train_test_split(regression_X, regression_Y, test_size=0.2, random_state=42)
classification_X_train, classification_X_test, classification_Y_train, classification_Y_test = train_test_split(classification_X, classification_Y, test_size=0.2, random_state=42)

Evaluation functions

In [248]:
def regression_eval(my_model, library_model, X_train, X_test, Y_train, Y_test, decimal_places=5):
    
    # Fit models
    my_model.fit(X_train, Y_train)
    library_model.fit(X_train, Y_train)
    
    # Predict
    my_pred = my_model.predict(X_test)
    library_pred = library_model.predict(X_test)
    
    # Calculate MSE
    my_mse = round(mean_squared_error(Y_test, my_pred), decimal_places)
    library_mse = round(mean_squared_error(Y_test, library_pred), decimal_places)
    
    return my_mse, library_mse

# Classical Models

In this section I will focus on implementing the classical machine learning models:
* [Linear Regression](#linear-regression)
* [Logistic Regression](#logistic-regression)
* [Support Vector Machine](#support-vector-machine)
* [K-Means](#k-means)
* [K-Nearest Neighbors](#k-nearest-neighbors)
* [Decision Trees](#decision-trees)
* [Random Forest](#random-forest)
* [Naive Bayes](#naive-bayes)
* [Principal Component Analysis](#principal-component-analysis)

## Linear Regression

Define the models

In [307]:
from sklearn.linear_model import LinearRegression as LibraryLinearRegression

class MyLinearRegression():
    
    def __init__(self, learning_rate: float, iterations: int) -> None:
        self.learning_rate = learning_rate
        self.iterations = iterations
    
    def fit(self, X: np.ndarray, Y: np.ndarray) -> float:
        
        # Training examples
        self.X = X
        self.Y = Y
        
        # Number of training examples and input/output features
        self.m, self.n = X.shape
        self.o = Y.shape[1]
        
        # Initialize weights and bias
        self.W = np.zeros([self.n, self.o])
        self.b = np.zeros(self.o)
        
        print(self.W.shape)
        print(self.b.shape)
        
        for i in range(self.iterations):
            self.update_weights()
        
        return np.mean(np.square(self.predict(X) - Y))
    
    def update_weights(self):
        
        # Predict the output
        Y_pred = self.predict(self.X)
        
        # Calculate gradients
        dW = 2 * (self.X.T).dot(Y_pred - self.Y) / self.m
        db = 2 * np.mean(Y_pred - self.Y)
        
        # Update weights
        self.W = self.W - self.learning_rate * dW
        self.b = self.b - self.learning_rate * db
    
    def predict(self, X: np.ndarray) -> np.ndarray:
        return X.dot(self.W) + self.b

Single Regression

In [None]:
my_model = MyLinearRegression(0.01, 1000)
library_model = LibraryLinearRegression()
my_mse, library_mse = regression_eval(
    my_model, 
    library_model, 
    regression_X_train[:,1].reshape(-1,1),
    regression_X_test[:,1].reshape(-1,1),
    regression_Y_train[:,1].reshape(-1,1),
    regression_Y_test[:,1].reshape(-1,1))

fig, ax = plt.subplots()
ax.axline((0, my_model.b), slope=my_model.W[0], color='red', label='My Model', linestyle='dotted')
ax.axline((0, library_model.intercept_), slope=library_model.coef_[0], color='blue', label='Library Model', linestyle='dotted')
ax.scatter(regression_X[:,1], regression_Y[:,1], color='black')
ax.legend([f'My Model | MSE={my_mse}', f'Library Model | MSE={library_mse}'])
plt.show()

Multiple Regression

In [308]:
my_model = MyLinearRegression(0.001, 10000)
library_model = LibraryLinearRegression()
my_mse, library_mse = regression_eval(
    my_model, 
    library_model, 
    regression_X_train,
    regression_X_test,
    regression_Y_train,
    regression_Y_test)

print(f'My Model MSE: {my_mse}')
print(f'Library Model MSE: {library_mse}')

(3, 3)
(3,)
My Model MSE: 1937.36119
Library Model MSE: 239.15367


## Logistic Regression

## Polynomial Regression

## Support Vector Machine

## K-Means

## K-Nearest Neighbors

## Decision Tree

## Random Forest

## Naive Bayes

## Principal Component Analysis