## Abstract Base Class (ABC)

* An **`abstract base class`** is a class that is used as a `blueprint` for other classes. Abstract base classes are a powerful feature in Python since they help you define a blueprint for other classes that may have something in common. [source](https://www.educative.io/answers/what-is-the-abstract-base-class-in-python)

* `ABC`s can **NOT** be instantiated. They can only be used as a `template/blueprint` by other classes.

In [1]:
import numpy as np
import pandas as pd


%load_ext lab_black

In [2]:
from abc import ABC, abstractmethod
from typing import Union, Optional

In [3]:
class Model(ABC):
    """This is an abstract class for defining an ML model/algorithm."""

    @abstractmethod
    def __repr__(self) -> None:
        """This is used for printing the model signature."""
        pass

    @abstractmethod
    def fit(self, X: np.ndarray, y=np.ndarray) -> None:
        """This is used for training the model."""
        pass

    @abstractmethod
    def predict(self, X: np.ndarray) -> None:
        """This is used for making predictions using
        the trained model."""
        pass

In [4]:
class LinearRegression(Model):
    """This is used to model a linear regression algorithm."""

    def __init__(self, n_iters: int = 5_000, learning_rate: float = 0.001) -> None:
        self.n_iters = n_iters
        self.learning_rate = learning_rate
        self.weight = None
        self.bias = None

### Comment

* If the methods `__repr__`, `fit` and `predict` are not implemented, it returns a `TypeError`.

In [5]:
lr = LinearRegression()

TypeError: Can't instantiate abstract class LinearRegression with abstract methods __repr__, fit, predict

In [6]:
class LinearRegression(Model):
    """This is used to model a linear regression algorithm."""

    def __init__(self, n_iters: int = 5_000, learning_rate: float = 0.001) -> None:
        self.n_iters = n_iters
        self.learning_rate = learning_rate
        self.weight = None
        self.bias = None

    def __repr__(self) -> str:
        _repr = (
            f"{__class__.__name__}(n_iters={self.n_iters}, "
            f"learning_rate={self.learning_rate})"
        )
        return _repr

    def fit(self, X: np.ndarray, y=np.ndarray) -> None:
        # Initialize the weight and bias with zero
        n_samples, n_features = X.shape
        self.weight = np.zeros(n_features)  # Vector
        self.bias = 0  # Scalar

        for _ in range(self.n_iters):
            # Estimate y. i.e y_hat
            # Shape of X: (n_samples, n_features), Shape of weight: (n_features, 1)
            y_pred = np.dot(X, self.weight) + self.bias

            # Using gradient descent, calculate change in weight
            # and bias for each training sample.
            # Shape of X: (1, n_features), Shape of weight: (1, 1)
            # Transpose X so that you have: Shape of X.T: (n_features, 1)
            dw = (1 / n_samples) * 2 * (np.dot(X.T, (y_pred - y)))
            db = 2 * np.mean((y_pred - y))

            # Update the parameters
            self.weight -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

        return self

    def predict(self, X: np.ndarray) -> float:
        y_pred = np.dot(X, self.weight) + self.bias
        return y_pred

    @staticmethod
    def calculate_mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
        """This is used to calculate the Mean Squared Error."""
        mse = np.mean(np.square(y_true - y_pred))
        return round(mse, 2)

### Generate Mock Data

In [7]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

RANDOM_STATE = 123
TEST_SIZE = 0.1
N_SAMPLES = 2_000
N_FEATURES = 1
NOISE = 10

data = make_regression(
    n_samples=N_SAMPLES, n_features=N_FEATURES, noise=NOISE, random_state=RANDOM_STATE
)
X, y = data

# split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE
)

In [8]:
lr = LinearRegression(n_iters=5_000, learning_rate=0.0001)
lr.fit(X_train, y_train)

LinearRegression(n_iters=5000, learning_rate=0.0001)

In [9]:
y_pred = lr.predict(X=X_test)

lr.calculate_mean_squared_error(y_true=y_test, y_pred=y_pred)

313.75

In [10]:
# Train another model (Use a larger learning rate)
lr = LinearRegression(n_iters=5_000, learning_rate=0.01)
lr.fit(X_train, y_train)
y_pred = lr.predict(X=X_test)

lr.calculate_mean_squared_error(y_true=y_test, y_pred=y_pred)

98.54