# Dodatak: restruktuiran kod

Primjer jednog od načina struktuiranja koda korištenog u poglavlju o regresionoj analizi.

In [1]:
""" This module contains a regression analysis toolkit """
from collections import Callable
from functools import partial
from random import random
from typing import Union

import numpy as np
from sklearn import linear_model


class RegressionAnalysis:
    """ Class containing regression analysis tools """
    def __init__(self, training_data: np.ndarray) -> None:
        """
        Args:
            training_data: Training data array of the format:
                [[[input_value, ..., input_value], [output_value]],
                  [input_value, ..., input_value], [output_value]],
                  ...,
                  [input_value, ..., input_value], [output_value]]
        """
        self.training_data: np.ndarray = training_data
        self.input_training_data: np.ndarray = training_data.T[0]
        self.output_training_data: np.ndarray = training_data.T[1]

    def train_sklearn_linear_regression(self) -> Callable:
        """
        Returns:
            Trained linear regression model.
            Returned as a callable.
            Trained via sklearn lib.
        """
        # np.expand_dims(a, axis=1) for input of a = [1, 2, 3]
        # returns [[1], [2], [3]]
        # This expanded format of np array is required for an input to
        # linear_model.LinearRegression.fit method as well as for
        # linear_model.LinearRegression.predict method
        input_training_data: np.ndarray = np.expand_dims(
            self.input_training_data, axis=1)

        # Create linear regression object
        linear_regression = linear_model.LinearRegression()

        # Train the model using the training set
        linear_regression.fit(input_training_data,
                              self.output_training_data)

        return linear_regression.predict

    def train_fchc_linear_regression(self, step_size: float) -> Callable:
        """
        Args:
            step_size: Hill climbing step size.
        Returns:
            Trained linear regression model.
            Returned as a callable.
            Trained using first choice hill climbing metaheuristic.
        """
        m_0, m_1 = random(), random()

        mse: float = Utils.calculate_linear_regression_mse(
            training_set=self.training_data,
            m_0=m_0,
            m_1=m_1)

        while True:
            for step_sign_0, step_sign_1 in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
                updated_parameters = (
                    m_0 + step_size * step_sign_0,
                    m_1 + step_size * step_sign_1)
                new_mse = Utils.calculate_linear_regression_mse(
                    self.training_data, *updated_parameters)

                if new_mse < mse:
                    m_0, m_1 = updated_parameters
                    break

            if new_mse < mse:
                mse = new_mse
                continue

            break

        return partial(Utils.calculate_line_height, k=m_1, n=m_0)

    def train_calculus_linear_regression(self) -> Callable:
        """
        Returns:
            Trained linear regression model.
            Returned as a callable.
            Trained by calculating local extrema (minimum) of an error function
            via calculus.
        """
        training_set_size: int = len(self.training_data)

        m_0_ = np.sum(self.input_training_data ** 2)
        m_1_ = np.dot(self.input_training_data,
                      self.output_training_data)

        x_ = np.mean(self.input_training_data)
        y_ = np.mean(self.output_training_data)

        x__ = m_0_ / training_set_size / x_
        y__ = m_1_ / training_set_size / x_

        m_1: float = (y__ - y_) / (x__ - x_)
        m_0: float = y_ - m_1 * x_

        return partial(Utils.calculate_line_height, k=m_1, n=m_0)

    def train_polynomial_regression(self, deg: int) -> Callable:
        """
        Args:
            deg: Regression polynomial degree.

        Returns:
            Trained polynomial regression model.
            Returned as a callable.
            Trained via numpy polyfit.
        """
        coefficients = np.polyfit(
            x=self.input_training_data,
            y=self.output_training_data,
            deg=deg)
        
        return np.poly1d(coefficients)

In [2]:
""" Utilities module """
from functools import partial
from typing import Union


class Utils:
    """ Utilities """
    @staticmethod
    def calculate_line_height(x: Union[np.ndarray, float],
                              k: float, n: float) -> float:
        """
        Returns:
            Height of line y = k * x + n for given k, x and n parameters.
        """
        return k * x + n

    @classmethod
    def calculate_linear_regression_mse(cls, training_set: np.ndarray,
                                        m_0: float, m_1: float) -> float:
        """
        Args:
            training_set: Training data array of the format:
                [[[input_value, ..., input_value], output_value],
                  [input_value, ..., input_value], output_value],
                  ...,
                  [input_value, ..., input_value], output_value]]
            m_0: Zero degree coefficient
            m_1: First degree coefficient
        Returns:
            Mean squared error produced on top of provided training set,
            using linear regression y = m_1*x + m_0
        """
        inputs: np.ndarray = training_set.T[0]
        outputs: np.ndarray = training_set.T[1]
        linear_regression_predict: Callable[[float], float] = partial(
            cls.calculate_line_height, k=m_1, n=m_0)

        predictions: np.ndarray = linear_regression_predict(inputs)
        squared_errors = (predictions - outputs) ** 2

        return float(np.mean(squared_errors))

Testirajmo navedeni kod:

In [3]:
training_data = np.asarray([
    [10000, 31000], [400000, 19000], [5000, 32000], [0, 40000], [1000, 33000],
    [100000, 26000], [50000, 29000], [50, 35000], [20000, 30000],
    [200000, 20000]])

regression_analysis = RegressionAnalysis(
    training_data=training_data)
sklearn_linear_regression = regression_analysis.train_sklearn_linear_regression()
fchc_linear_regression = regression_analysis.train_fchc_linear_regression(step_size=10)
calculus_linear_regression = regression_analysis.train_calculus_linear_regression()
polynomial_regression = regression_analysis.train_polynomial_regression(deg=3)

test_value: float = 101010
print(f'Predictions for mileage of {test_value} km:\n'
      f'\tVia sklearn: {sklearn_linear_regression([[test_value]])}\n'
      f'\tVia fchc: {fchc_linear_regression(test_value)}\n'
      f'\tVia calculus: {calculus_linear_regression(test_value)}\n'
      f'\tVia polynomial regression: {polynomial_regression(test_value)}')

Predictions for mileage of 101010 km:
	Via sklearn: [28546.88575756]
	Via fchc: 40080.19288246815
	Via calculus: 28546.88575755563
	Via polynomial regression: 24545.091584819063


Primijetimo da među navedenim metodama nema one koja nalazi višedimenzionalnu regresiju. Naime, potrebno je nekoliko izmjena da bi metoda `train_sklearn_linear_regression` klase `RegressionAnalysis`, nad trening podacima koji sadrže višedimenzionalne ulazne trening podatke, pronašla višedimenzionalnu regresiju. Pokušajte, sa što manje izmjena, da omogućite klasu `RegressionAnalysis` da po prijemu trening podataka procijeni o kojoj dimenziji ulaznih trening podataka se radi, te da u slučaju dimenzije veće od 1, korisniku omogući korištenje samo metode `train_sklearn_linear_regression`. Također, potrebno je prilagoditi navedenu metodu za rad nad višedimenzionalnim ulaznim trening podacima, a da se pri tome ne naruši njena trenutna funkcionalnost.