In [4]:
from IPython.display import HTML
import random

def hide_toggle(for_next=False):
    this_cell = """$('div.cell.code_cell.rendered.selected')"""
    next_cell = this_cell + '.next()'

    toggle_text = 'Toggle show/hide'  # text shown on toggle link
    target_cell = this_cell  # target cell to control with toggle
    js_hide_current = ''  # bit of JS to permanently hide code in current cell (only when toggling next cell)

    if for_next:
        target_cell = next_cell
        toggle_text += ' next cell'
        js_hide_current = this_cell + '.find("div.input").hide();'

    js_f_name = 'code_toggle_{}'.format(str(random.randint(1,2**64)))

    html = """
        <script>
            function {f_name}() {{
                {cell_selector}.find('div.input').toggle();
            }}

            {js_hide_current}
        </script>

        <a href="javascript:{f_name}()">{toggle_text}</a>
    """.format(
        f_name=js_f_name,
        cell_selector=target_cell,
        js_hide_current=js_hide_current, 
        toggle_text=toggle_text
    )

    return HTML(html)

hide_toggle()

In [86]:
width=1200
height=700

# Good practices in software and ML engineering

</br>
</br>
</br>
</br>
</br>
</br>
</br>
</br>
</br>
</br>
<em style="text-align: right">All illustrations are found on Google Images </em>


# Outline

* Motivation
* Observations about S/ML engineering
* S/ML lifecycle
* Testing in SE/ML
* Walkthrough on testing in ML
* Discussion (optional)

Throughout the presentation I will be juxtaposing software and machine learning engineering

# Mentimeter

In [88]:
hide_toggle(True)

In [87]:
from IPython.display import IFrame
IFrame('https://www.mentimeter.com/app/presentation/alykk9rsbe7jdnsuqmt48gn72upwvbve/euakbnnievau/embed', width=width, height=height)

# Motivation

# Why does software fail?

- No Man's Sky. High expectations before release

![](https://nmswp.azureedge.net/wp-content/uploads/2021/09/planetary-settlements-1-1040w.jpg)

- Alexa party host. 

Police were called to a house in Germany that was expected of throwing a loud all-night party. Turned out Alexa had just decided to host her own rave and was playing music at 3am without her owner’s knowledge.


![](https://www.telegraph.co.uk/content/dam/news/2017/11/08/TELEMMGLPICT000142023792_trans_NvBQzQNjv4Bqek9vKm18v_rkIPH9w2GMNoGXySPv9M1Jbe0Fc3Bi1Fk.jpeg?imwidth=1280)

# Observations about SE

The Mythical Man-Month

![https://upload.wikimedia.org/wikipedia/en/f/fd/Mythical_man-month_%28book_cover%29.jpg](https://upload.wikimedia.org/wikipedia/en/f/fd/Mythical_man-month_%28book_cover%29.jpg)

# A few observations about SE

- No silver bullet

- Conceptual integrity

- The surgical team

- Lowering software development costs

# Software development lifecycle

<center><img src="https://upload.wikimedia.org/wikipedia/commons/5/51/Waterfall_model.png" width="70%"></center>

# Modern software development lifecycle

<center><img src="https://pimages.toolbox.com/wp-content/uploads/2021/08/26123909/DevOps-Lifecycle.png" width="70%"></center>


# Machine learning engineering lifecycle

<center><img src="https://ml-ops.org/img/ml-engineering.jpg" width="70%"></center>

<h2 class="r-fit-text">Software development as a social activity</h2>

In [8]:
hide_toggle(True)

In [90]:
from IPython.display import IFrame
IFrame('https://www.mentimeter.com/app/presentation/alykk9rsbe7jdnsuqmt48gn72upwvbve/ghzri1jqrosx/embed', width=width, height=height)

# Testing in SE/ML

<center><img src="https://ml-ops.org/img/ml-engineering.jpg" width="50%"></center>

# Why testing?

Software testing is the process of evaluating and verifying that a software product or application does what it is supposed to do. The benefits of testing include preventing bugs, reducing development costs and improving performance. *From IBM*

</br>

- Automation/Formalization of the manual testing
- Helps to define system requirements 
- Helps with system design
- Helps with code refactoring/regressions

# Types of tests in SE

- **Acceptance testing**: Verifying whether the whole system works as intended.
- **Integration testing**: Ensuring that software components or functions operate together.
- **Unit testing**: Validating that each software unit performs as expected. A unit is the smallest testable component of an application.
- **Functional testing**: Checking functions by emulating business scenarios, based on functional requirements. Black-box testing is a common way to verify functions.
- **Performance testing**: Testing how the software performs under different workloads. Load testing, for example, is used to evaluate performance under real-life load conditions.
- **Regression testing**: Checking whether new features break or degrade functionality. Sanity testing can be used to verify menus, functions and commands at the surface level, when there is no time for a full regression test.
- **Stress testing**: Testing how much strain the system can take before it fails. Considered to be a type of non-functional testing.
- **Usability testing**: Validating how well a customer can use a system or web application to complete a task.

# Additional types of tests in ML

- **Invariance testing**: Defines input changes that are expected to leave model outputs unaffected.
- **Directional expectation testing**: Defines input distribution changes expected effects on the output.
- **Minimum functionality testing**: Defines behavior of the individual model components

# Walkthrough on testing in ML

## Stage 1. Prototyping

In [72]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_transformer
from sklearn.compose import TransformedTargetRegressor
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.svm import LinearSVR
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.base import BaseEstimator
from typing import Dict
from copy import deepcopy

import warnings
warnings.filterwarnings("ignore")

def highlight_cols(x):
    #copy df to new - original data are not changed
    df = x.copy()
    #select all values to default value - red color
    df.loc[:,:] = 'background-color: yellow'
    #overwrite values grey color
    df[['SALE_PRICE']] = 'background-color: magenta'
    #return color df
    return df    

## Data

<img width="500px" src="https://nypost.com/wp-content/uploads/sites/2/2022/07/Manhattan-89.jpg">

In [73]:
dataset1: pd.DataFrame = pd.read_pickle("data/dataset_0.pandas_pickle")

In [28]:
hide_toggle(True)

In [26]:
dataset1[['BUYER_SEX', 'BOROUGH', 'NEIGHBORHOOD', 'ZIP_CODE', 'BATHROOM_COUNT', 'LAND_SQUARE_FEET', 'GROSS_SQUARE_FEET', 'YEAR_BUILT', 'SALE_DATE_ORD', 'BUILDING_CLASS_CATEGORY_ORD', 'SALE_PRICE']].head().style.apply(highlight_cols, axis=None)

Unnamed: 0,BUYER_SEX,BOROUGH,NEIGHBORHOOD,ZIP_CODE,BATHROOM_COUNT,LAND_SQUARE_FEET,GROSS_SQUARE_FEET,YEAR_BUILT,SALE_DATE_ORD,BUILDING_CLASS_CATEGORY_ORD,SALE_PRICE
0,M,4,SO. JAMAICA-BAISLEY PARK,11434,1,4000.0,1128.0,1925,71,1.0,223500
1,M,3,BROWNSVILLE,11212,3,2500.0,2272.0,1930,1134,2.0,550000
2,W,3,MADISON,11229,3,9350.0,1253.0,2016,5138,13.0,896060
3,M,3,GRAVESEND,11204,4,4000.0,32667.0,2007,866,2.0,995000
4,W,4,ST. ALBANS,11412,1,3100.0,1650.0,1925,965,2.0,275000


## Data preprocessing

In [74]:
X_train, X_val, y_train, y_val = train_test_split(dataset1.drop(columns=["SALE_PRICE"]), dataset1["SALE_PRICE"])

ct = make_column_transformer((make_pipeline(StandardScaler()), ["ZIP_CODE", "LAND_SQUARE_FEET", "GROSS_SQUARE_FEET", "YEAR_BUILT", "BATHROOM_COUNT"]),
                             (OneHotEncoder(sparse=False, handle_unknown = "ignore"), ["BOROUGH", "NEIGHBORHOOD", "BUILDING_CLASS_CATEGORY", "SALE_DATE_ORD", "BUILDING_CLASS_CATEGORY_ORD", "BUYER_SEX"]), 
                             verbose_feature_names_out=False)

## Fitting a regressor

In [75]:
pipeline: Pipeline = make_pipeline(ct, LinearSVR(random_state=42), memory='.cache', verbose=False)
estimator = TransformedTargetRegressor(regressor=pipeline, transformer=StandardScaler())
estimator.fit(X_train, y_train)

TransformedTargetRegressor(regressor=Pipeline(memory='.cache',
                                              steps=[('columntransformer',
                                                      ColumnTransformer(transformers=[('pipeline',
                                                                                       Pipeline(steps=[('standardscaler',
                                                                                                        StandardScaler())]),
                                                                                       ['ZIP_CODE',
                                                                                        'LAND_SQUARE_FEET',
                                                                                        'GROSS_SQUARE_FEET',
                                                                                        'YEAR_BUILT',
                                                                                        'BATHROOM

## Evaluation of model

In [32]:
y_pred = estimator.predict(X_val)
mean_absolute_error(y_val, y_pred), r2_score(y_val, y_pred)

(768889.4962646911, -0.19805324322000994)

## Let's try another model

In [36]:
pipeline: Pipeline = make_pipeline(ct, HistGradientBoostingRegressor(random_state=42), memory='.cache', verbose=True)
estimator = TransformedTargetRegressor(regressor=pipeline, transformer=StandardScaler())
estimator.fit(X_train, y_train)
y_pred = estimator.predict(X_val)
mean_absolute_error(y_val, y_pred), r2_score(y_val, y_pred)

[Pipeline]  (step 2 of 2) Processing histgradientboostingregressor, total= 1.3min


(591753.9979511421, 0.41836059190780905)

## Stage 2. Refactoring into functions

## Data

In [76]:
def load_data():
    data_dict: Dict[int, pd.DataFrame] = dict()
    for i in range(4):
        data_dict[i] = pd.read_pickle(f"data/dataset_{i}.pandas_pickle")
    return data_dict

## Estimator/Regressor

In [77]:
def get_estimator(regressor_name='linearSVR'):

    if regressor_name == 'linearSVR':
        regressor = LinearSVR(random_state=42)
    elif regressor_name == 'HistGradientBoostingRegressor':
        regressor = HistGradientBoostingRegressor(random_state=42)
    ct = make_column_transformer((make_pipeline(StandardScaler()), ["ZIP_CODE", "LAND_SQUARE_FEET", "GROSS_SQUARE_FEET", "YEAR_BUILT", "BATHROOM_COUNT", "SALE_DATE_ORD"]),
                             (OneHotEncoder(sparse=False, handle_unknown = "ignore"), ["BOROUGH", "NEIGHBORHOOD", "BUILDING_CLASS_CATEGORY", "BUILDING_CLASS_CATEGORY_ORD", "BUYER_SEX"]), 
                             verbose_feature_names_out=False)

    pipeline: Pipeline = make_pipeline(ct, regressor, memory='.cache', verbose=False)
    estimator = TransformedTargetRegressor(regressor=pipeline, transformer=StandardScaler())
    return estimator

In [None]:
def get_estimator(regressor_name='linearSVR'):

    if regressor_name == 'linearSVR':
        regressor = LinearSVR()
    elif regressor_name == 'HistGradientBoostingRegressor':
        regressor = HistGradientBoostingRegressor()
    ct = make_column_transformer((make_pipeline(StandardScaler()), ["ZIP_CODE", "LAND_SQUARE_FEET", "GROSS_SQUARE_FEET", "YEAR_BUILT", "BATHROOM_COUNT", "SALE_DATE_ORD"]),
                             (OneHotEncoder(sparse=False, handle_unknown = "ignore"), ["BOROUGH", "NEIGHBORHOOD", "BUILDING_CLASS_CATEGORY", "BUILDING_CLASS_CATEGORY_ORD", "BUYER_SEX"]), 
                             verbose_feature_names_out=False)

    pipeline: Pipeline = make_pipeline(ct, regressor, memory='.cache', verbose=False)
    estimator = TransformedTargetRegressor(regressor=pipeline, transformer=StandardScaler())
    return estimator

## The whole pipeline

In [78]:
def classify(data_dict):
    results = []
    estimators = dict()
    for idx, dataset in data_dict.items():
        X_train, X_val, y_train, y_val = train_test_split(dataset.drop(columns=["SALE_PRICE"]), dataset["SALE_PRICE"], random_state=42)
        estimators[idx] = dict()
        for regressor_name in ['linearSVR', 'HistGradientBoostingRegressor']:
            estimator = get_estimator(regressor_name)
            estimator.fit(X_train, y_train)

            y_pred = estimator.predict(X_val)

            estimators[idx][regressor_name] = estimator
            
            results.append([idx, regressor_name, mean_absolute_error(y_val, y_pred), r2_score(y_val, y_pred)])
    return results, estimators

In [39]:
def classify(data_dict):
    results = []
    estimators = dict()
    for idx, dataset in data_dict.items():
        X_train, X_val, y_train, y_val = train_test_split(dataset.drop(columns=["SALE_PRICE"]), dataset["SALE_PRICE"])
        estimators[idx] = dict()
        for regressor_name in ['linearSVR', 'HistGradientBoostingRegressor']:
            estimator = get_estimator(regressor_name)
            estimator.fit(X_train, y_train)

            y_pred = estimator.predict(X_val)

            estimators[idx][regressor_name] = estimator
            
            results.append([idx, regressor_name, mean_absolute_error(y_val, y_pred), r2_score(y_val, y_pred)])
    return results, estimators

## Evaluate results

In [None]:
data_dict = load_data()
results, estimators = classify(data_dict=data_dict)

In [42]:
df = pd.DataFrame(results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2'])
df

Unnamed: 0,Dataset Idx,Regressor,MAE,R2
0,0,linearSVR,826707.600905,0.204332
1,0,HistGradientBoostingRegressor,602822.336186,0.644838
2,1,linearSVR,821178.536598,0.264087
3,1,HistGradientBoostingRegressor,632824.458058,0.589781
4,2,linearSVR,809373.64882,0.211983
5,2,HistGradientBoostingRegressor,574875.248383,0.500433
6,3,linearSVR,872769.508629,0.340836
7,3,HistGradientBoostingRegressor,655705.125719,0.364833


In [40]:
hide_toggle(True)

In [97]:
from IPython.display import IFrame
IFrame('https://www.mentimeter.com/app/presentation/alykk9rsbe7jdnsuqmt48gn72upwvbve/puyi6ono1aqd/embed', width=width, height=height)

In [None]:
def get_estimator(regressor_name='linearSVR'):

    if regressor_name == 'linearSVR':
        regressor = LinearSVR()
    elif regressor_name == 'HistGradientBoostingRegressor':
        regressor = HistGradientBoostingRegressor()
    ct = make_column_transformer((make_pipeline(StandardScaler()), ["ZIP_CODE", "LAND_SQUARE_FEET", "GROSS_SQUARE_FEET", "YEAR_BUILT", "BATHROOM_COUNT", "SALE_DATE_ORD"]),
                             (OneHotEncoder(sparse=False, handle_unknown = "ignore"), ["BOROUGH", "NEIGHBORHOOD", "BUILDING_CLASS_CATEGORY", "BUILDING_CLASS_CATEGORY_ORD", "BUYER_SEX"]), 
                             verbose_feature_names_out=False)

    pipeline: Pipeline = make_pipeline(ct, regressor, memory='.cache', verbose=True)
    estimator = TransformedTargetRegressor(regressor=pipeline, transformer=StandardScaler())
    return estimator

# Make sure your randomness is fixed!

In [None]:
import unittest


class TestDataRegression(unittest.TestCase):
    def setUp(self):
        data_dict = load_data()
        self.dataset = data_dict[0]

        results, estimators = classify(data_dict={0: self.dataset})
        
        self.results = results
        self.estimators = estimators
    
    def test_results_are_determined(self):
        results, _ = classify(data_dict={0: self.dataset})
        setup_results_df = pd.DataFrame(self.results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2'])
        test_results_df = pd.DataFrame(results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2'])
        self.assertTrue(np.array_equiv(setup_results_df.values, test_results_df.values))

Do not forget to call functions with **random_state** specified

In [59]:
def get_estimator(regressor_name='linearSVR'):

    if regressor_name == 'linearSVR':
        regressor = LinearSVR(random_state=42)
    elif regressor_name == 'HistGradientBoostingRegressor':
        regressor = HistGradientBoostingRegressor(random_state=42)
    ct = make_column_transformer((make_pipeline(StandardScaler()), ["ZIP_CODE", "LAND_SQUARE_FEET", "GROSS_SQUARE_FEET", "YEAR_BUILT", "BATHROOM_COUNT", "SALE_DATE_ORD"]),
                             (OneHotEncoder(sparse=False, handle_unknown = "ignore"), ["BOROUGH", "NEIGHBORHOOD", "BUILDING_CLASS_CATEGORY", "BUILDING_CLASS_CATEGORY_ORD", "BUYER_SEX"]), 
                             verbose_feature_names_out=False)

    pipeline: Pipeline = make_pipeline(ct, regressor, memory='.cache', verbose=True)
    estimator = TransformedTargetRegressor(regressor=pipeline, transformer=StandardScaler())
    return estimator


## Invariance testing

In [43]:
def test_sex_invariance(self,):
    dataset = self.dataset.copy()
    dataset["BUYER_SEX"] = 'M'
    data_dict = {0: dataset}
    results, _ = classify(data_dict=data_dict)
    setup_results_df = pd.DataFrame(self.results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2'])
    test_results_df = pd.DataFrame(results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2'])
    self.assertTrue(np.allclose(setup_results_df['R2'].values, test_results_df['R2'].values, atol=1e-2))

## Direction expectation testing

In [44]:
def test_direction_expectation(self):

        X_test1 = self.dataset.drop(columns=["SALE_PRICE"]).sample(frac=0.2)
        X_test2 = X_test1.copy()

        X_test2["GROSS_SQUARE_FEET"] = X_test2["GROSS_SQUARE_FEET"] * 2
        X_test2["BATHROOM_COUNT"] = X_test2["BATHROOM_COUNT"] + 5

        estimator_names =self.estimators[0].keys()
        is_increasing = []
        for est in estimator_names:
            prices1 = self.estimators[0][est].predict(X_test1)
            prices2 = self.estimators[0][est].predict(X_test2)
            is_increasing.append(((prices2 - prices1) > 0).all())

        self.assertTrue(np.array(is_increasing).all())

## Test results

In [None]:
unittest.main(argv=[''], verbosity=2, exit=False)

In [49]:
hide_toggle(True)

In [79]:
import unittest


class TestDataRegression(unittest.TestCase):
    def setUp(self):
        data_dict = load_data()
        self.dataset = data_dict[0]

        results, estimators = classify(data_dict={0: self.dataset})
        
        self.results = results
        self.estimators = estimators
    
    def test_sex_invariance(self,):
        dataset = self.dataset.copy()
        dataset["BUYER_SEX"] = 'M'
        data_dict = {0: dataset}
        results, _ = classify(data_dict=data_dict)
        setup_results_df = pd.DataFrame(self.results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2'])
        test_results_df = pd.DataFrame(results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2'])
        self.assertTrue(np.allclose(setup_results_df['R2'].values, test_results_df['R2'].values, atol=1e-2))
    
    def test_direction_expectation(self):

        X_test1 = self.dataset.drop(columns=["SALE_PRICE"]).sample(frac=0.2)
        X_test2 = X_test1.copy()

        X_test2["GROSS_SQUARE_FEET"] = X_test2["GROSS_SQUARE_FEET"] * 2
        X_test2["BATHROOM_COUNT"] = X_test2["BATHROOM_COUNT"] + 5

        estimator_names =self.estimators[0].keys()
        is_increasing = []
        for est in estimator_names:
            prices1 = self.estimators[0][est].predict(X_test1)
            prices2 = self.estimators[0][est].predict(X_test2)
            is_increasing.append(((prices2 - prices1) > 0).all())

        self.assertTrue(np.array(is_increasing).all())

    def test_results_are_determined(self):
        results, _ = classify(data_dict={0: self.dataset})
        setup_results_df = pd.DataFrame(self.results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2'])
        test_results_df = pd.DataFrame(results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2'])
        self.assertTrue(np.array_equiv(setup_results_df.values, test_results_df.values))
    

unittest.main(argv=[''], verbosity=2, exit=False)

FAIL
ok
ok
test_column_transformer_scaler (__main__.TestSimpleDataset) ... ok

FAIL: test_direction_expectation (__main__.TestDataRegression)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/tmp/ipykernel_741/1821046248.py", line 38, in test_direction_expectation
    self.assertTrue(np.array(is_increasing).all())
AssertionError: False is not true

----------------------------------------------------------------------
Ran 4 tests in 171.876s

FAILED (failures=1)


<unittest.main.TestProgram at 0x7f1ecadcb970>

## Stage 3. Refactoring into classes

## SE principle relevant to testing

# High cohesion, low coupling

## High cohesion, low coupling

### Low coupling

Low coupling is about separating unrelated parts of the code base as much as possible.

### High cohesion

High cohesion means keeping parts of a code base that are related to each other in a single place

<center><img src="https://i.stack.imgur.com/zhZv2.png" ></center>

## Stage 3. Refactoring into classes

## Data ➔ High cohesion

In [80]:
class SimpleDataset():

    def __init__(self, path="") -> None:
        self.dataset = pd.read_pickle(path)
    
    def prepare(self):
        self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(self.dataset.drop(columns=["SALE_PRICE"]), self.dataset["SALE_PRICE"], random_state=42)
    
    def getColumnTransformer(self):
        ct = make_column_transformer((make_pipeline(StandardScaler()), ["ZIP_CODE", "LAND_SQUARE_FEET", "GROSS_SQUARE_FEET", "YEAR_BUILT", "BATHROOM_COUNT", "SALE_DATE_ORD"]),
                             (OneHotEncoder(sparse=False, handle_unknown = "ignore"), ["BOROUGH", "NEIGHBORHOOD", "BUILDING_CLASS_CATEGORY", "BUILDING_CLASS_CATEGORY_ORD", "BUYER_SEX"]), 
                             verbose_feature_names_out=False)
        return ct

    def getTrainSet(self):
        return self.X_train, self.y_train
    
    def getValSet(self):
        return self.X_val, self.y_val

In [81]:
def load_data():
    data_dict: Dict[int, SimpleDataset] = dict()
    for i in range(4):
        data_dict[i] = SimpleDataset(path=f"data/dataset_{i}.pandas_pickle")
    return data_dict

## Estimator/Regressor

In [82]:
def get_regressors() -> Dict[str, BaseEstimator]:
    return dict(linearSVR=LinearSVR(random_state=42), HistGradientBoostingRegressor=HistGradientBoostingRegressor(random_state=42))

## The whole pipeline  ➔ Low coupling

In [83]:
def classify(data_dict: Dict[int, SimpleDataset], regressors: Dict[str, BaseEstimator]):
    results = []
    estimators = dict()
    for idx, dataset in data_dict.items():
        dataset.prepare()
        estimators[idx] = dict()
        for regressor_name, regressor in regressors.items():
            
            pipeline: Pipeline = make_pipeline(dataset.getColumnTransformer(), regressor, memory='.cache', verbose=False)
            estimator = TransformedTargetRegressor(regressor=pipeline, transformer=StandardScaler())
            
            X_train, y_train = dataset.getTrainSet()
            estimator.fit(X_train, y_train)

            X_val, y_val = dataset.getValSet()

            y_pred = estimator.predict(X_val)

            estimators[idx][regressor_name] = estimator
            
            results.append([idx, regressor_name, mean_absolute_error(y_val, y_pred), r2_score(y_val, y_pred)])
    return results, estimators

In [None]:
data_dict = load_data()
regressors = get_regressors()
results, estimators = classify(data_dict=data_dict, regressors=regressors)

## Test case

## Low coupling

In [None]:
import unittest


class TestDataRegression(unittest.TestCase):
    def setUp(self):
        data_dict = load_data()
        self.dataset = data_dict[0]

        regressors = get_regressors()
        results, estimators = classify(data_dict={0: self.dataset}, regressors=regressors)
        
        self.results = results
        self.estimators = estimators
        
    def test_sex_invariance_linearSVR(self,):
        dataset = deepcopy(self.dataset)
        dataset.dataset["BUYER_SEX"] = 'M'
        data_dict = {0: dataset}
        regressors = dict(linearSVR=LinearSVR(random_state=42))
        results, _ = classify(data_dict=data_dict, regressors=regressors)
        setup_results_df = pd.DataFrame(self.results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2']).query(f"Regressor == 'linearSVR'")
        test_results_df = pd.DataFrame(results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2'])
        self.assertTrue(np.allclose(setup_results_df['R2'].values, test_results_df['R2'].values, atol=1e-2))
    
    def test_sex_invariance_histGradBoost(self,):

        dataset = deepcopy(self.dataset)
        dataset.dataset["BUYER_SEX"] = 'M'
        data_dict = {0: dataset}
        regressors = dict(HistGradientBoostingRegressor=HistGradientBoostingRegressor(random_state=42))
        results, _ = classify(data_dict=data_dict, regressors=regressors)
        setup_results_df = pd.DataFrame(self.results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2']).query(f"Regressor == 'HistGradientBoostingRegressor'")
        test_results_df = pd.DataFrame(results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2'])
        self.assertTrue(np.allclose(setup_results_df['R2'].values, test_results_df['R2'].values, atol=1e-2))

## Test results

In [63]:
hide_toggle(True)

In [84]:
import unittest


class TestDataRegression(unittest.TestCase):
    def setUp(self):
        data_dict = load_data()
        self.dataset = data_dict[0]

        regressors = get_regressors()
        results, estimators = classify(data_dict={0: self.dataset}, regressors=regressors)
        
        self.results = results
        self.estimators = estimators
    
    def test_sex_invariance_linearSVR(self,):
        dataset = deepcopy(self.dataset)
        dataset.dataset["BUYER_SEX"] = 'M'
        data_dict = {0: dataset}
        regressors = dict(linearSVR=LinearSVR(random_state=42))
        results, _ = classify(data_dict=data_dict, regressors=regressors)
        setup_results_df = pd.DataFrame(self.results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2']).query(f"Regressor == 'linearSVR'")
        test_results_df = pd.DataFrame(results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2'])
        self.assertTrue(np.allclose(setup_results_df['R2'].values, test_results_df['R2'].values, atol=1e-2))
    
    def test_sex_invariance_histGradBoost(self,):

        dataset = deepcopy(self.dataset)
        dataset.dataset["BUYER_SEX"] = 'M'
        data_dict = {0: dataset}
        regressors = dict(HistGradientBoostingRegressor=HistGradientBoostingRegressor(random_state=42))
        results, _ = classify(data_dict=data_dict, regressors=regressors)
        setup_results_df = pd.DataFrame(self.results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2']).query(f"Regressor == 'HistGradientBoostingRegressor'")
        test_results_df = pd.DataFrame(results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2'])
        self.assertTrue(np.allclose(setup_results_df['R2'].values, test_results_df['R2'].values, atol=1e-2))
    
    def test_direction_expectation_linearSVR(self):

        X_test1 = self.dataset.dataset.drop(columns=["SALE_PRICE"]).sample(frac=0.2, random_state=42)
        X_test2 = X_test1.copy()

        X_test2["GROSS_SQUARE_FEET"] = X_test2["GROSS_SQUARE_FEET"] * 2
        X_test2["BATHROOM_COUNT"] = X_test2["BATHROOM_COUNT"] + 5

        estimator_name = "linearSVR"

        prices1 = self.estimators[0][estimator_name].predict(X_test1)
        prices2 = self.estimators[0][estimator_name].predict(X_test2)
        is_increasing = ((prices2 - prices1) > 0).all()

        self.assertTrue(is_increasing)
    
    def test_direction_expectation_histGradBoost(self):

        X_test1 = self.dataset.dataset.drop(columns=["SALE_PRICE"]).sample(frac=0.2, random_state=42)
        X_test2 = X_test1.copy()

        X_test2["GROSS_SQUARE_FEET"] = X_test2["GROSS_SQUARE_FEET"] * 2
        X_test2["BATHROOM_COUNT"] = X_test2["BATHROOM_COUNT"] + 5

        estimator_name = "HistGradientBoostingRegressor"

        prices1 = self.estimators[0][estimator_name].predict(X_test1)
        prices2 = self.estimators[0][estimator_name].predict(X_test2)
        is_increasing = ((prices2 - prices1) > 0).all()

        self.assertTrue(is_increasing)

    def test_results_are_determined(self):

        regressors = get_regressors()
        results, _ = classify(data_dict={0: self.dataset}, regressors=regressors)
        
        setup_results_df = pd.DataFrame(self.results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2'])
        test_results_df = pd.DataFrame(results, columns=['Dataset Idx', 'Regressor', 'MAE', 'R2'])
        self.assertTrue(np.array_equiv(setup_results_df.values, test_results_df.values))

unittest.main(defaultTest='TestDataRegression', argv=[''], verbosity=2, exit=False)

FAIL
ok
ok
ok
ok

FAIL: test_direction_expectation_histGradBoost (__main__.TestDataRegression)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/tmp/ipykernel_741/1384773004.py", line 66, in test_direction_expectation_histGradBoost
    self.assertTrue(is_increasing)
AssertionError: False is not true

----------------------------------------------------------------------
Ran 5 tests in 238.779s

FAILED (failures=1)


<unittest.main.TestProgram at 0x7f1ec909a970>

## Unit test

In [69]:
import unittest
from pandas.api.types import  is_numeric_dtype

class TestSimpleDataset(unittest.TestCase):
    def setUp(self):
        self.dataset = SimpleDataset(path=f"data/dataset_0.pandas_pickle")
    
    def test_column_transformer_scaler(self):
        ct = self.dataset.getColumnTransformer()
        std_scaling_columns = ct.transformers[0][-1]
        are_numeric_types = [is_numeric_dtype(self.dataset.dataset[column_name]) for column_name in std_scaling_columns]
        self.assertTrue(np.array(are_numeric_types).all())



unittest.main(defaultTest='TestSimpleDataset', argv=[''], verbosity=2, exit=False)

test_column_transformer_scaler (__main__.TestSimpleDataset) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.013s

OK


<unittest.main.TestProgram at 0x7f1ed3833880>

## Summary

* Motivation
* Observations about S/ML engineering
* S/ML lifecycle
* Testing in SE/ML
* Walkthrough on testing in ML

In [93]:
hide_toggle(True)

In [96]:
from IPython.display import IFrame
IFrame('https://www.mentimeter.com/app/presentation/alykk9rsbe7jdnsuqmt48gn72upwvbve/9ergb4sppk7b/embed', width=width, height=height)