In [1]:
# Name: Ishan Vyas
# UCID: 30068270
# Course: CPSC 502.06
# Project: Regression testing for machine learning classifiers
# Supervisor: Dr. Frank Maurer

# Testing framework using Metamorphic testing and caching based around the diabetes dataset

# This framework/guide is to understand how to regression test machine learning classifiers.

In [26]:
# Import statements
import pytest
import time 
import random
from functools import lru_cache
from hashdf import HashableDataFrame

import numpy as np
import Simple_Diabetes_Predictor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier

In [3]:
# This time shows the time it takes to train a model and then access it if its not cached
start = time.time()
modelNotCache = Simple_Diabetes_Predictor.getModel(Simple_Diabetes_Predictor.df_hash, Simple_Diabetes_Predictor.K)
end = time.time()
print(end - start)

0.02315497398376465


In [4]:
# Caching the model in memory
@lru_cache(maxsize=None)
def getModel(db, K):
    data = db.loc[:, db.columns != 'Outcome']
    labels = db.loc[:, db.columns == 'Outcome']
    
    X_train, X_test, y_train, y_test = train_test_split(data, labels.values.ravel(), test_size=0.25, random_state=1)

    neigh = KNeighborsClassifier(n_neighbors=K)  
    neigh.fit(X_train, y_train)

    Initial = neigh.predict(X_test)
    return neigh, Initial

In [27]:
# This time shows the time it takes to access the cached model
start = time.time()
modelCache = getModel(Simple_Diabetes_Predictor.df_hash, Simple_Diabetes_Predictor.K)
end = time.time()
print(end - start)

0.0018279552459716797


In [6]:
# Implementation of the metamorphic testing 
# Metamorphic Relations Helper Functions
def affineTransform(x):
    return (2*x + 1)

In [7]:
# Transform the dataset and apply the transformations to the incoming input
def transform_db(dataset):
    transformed_db = dataset.copy()
    
    # Applying affine transformation
    for column in transformed_db:
        if column != "Outcome":
            transformed_db[column] = transformed_db[column].apply(affineTransform)
            
    # Applying duplicating columns
    for column in transformed_db:
        if column != "Outcome":
            transformed_db[column+"*"] = transformed_db[column]
            
    # Applying uninformative variable 
    transformed_db['UninformativeVariable']=1
            
    # Permutation of columns
    list_columns = list(transformed_db.columns.values)
    random.shuffle(list_columns)
    transformed_db = transformed_db.reindex(columns=list_columns)
    
    # Consistence with re-prediction
        
    return transformed_db

In [31]:
# Train the model on the transformed input
@lru_cache(maxsize=None)
def getTransformedModel(dataset, K):
    data1 = dataset.loc[:, dataset.columns != 'Outcome']
    labels1 = dataset.loc[:, dataset.columns == 'Outcome']
    
    X_train, X_test, y_train, y_test = train_test_split(data1, labels1.values.ravel(), test_size=0.25, random_state=1)

    neigh = KNeighborsClassifier(n_neighbors=K)  
    neigh.fit(X_train, y_train)

    New = neigh.predict(X_test)
    
    return neigh, New, X_test

In [32]:
# Transform the dataset
df_transformed = transform_db(Simple_Diabetes_Predictor.df_hash)
df_hashableT = HashableDataFrame(df_transformed)

In [33]:
# Gets the column order of the transformed data set
columnOrder = list(df_hashableT.columns)
columnOrder.remove('Outcome')

# Pre established test cases with known output
examplesInput_df = [[7, 136, 74, 26, 135, 26.0, 0.647, 51],
               [1, 85, 66, 29, 0, 26.6, 0.351, 31],
               [10, 101, 76, 48, 180, 32.9, 0.171, 63],
               [0, 198, 66, 32, 274, 41.3, 0.502, 28],
               [5, 122, 86, 0, 0, 34.7, 0.290, 33]]

# Known outputs for the test cases established
examplesOutput_df = np.array([1,0,0,1,0])

# Transformation of the example input to fit the prediction format.
def transfromExample():
    transformedExamples_df = []
    k = 0
    for i in range(0,len(examplesInput_df)):
        transformedExamples_df.append([])
        # Needs to be dynamic
        preg = affineTransform(examplesInput_df[i][0])
        glu = affineTransform(examplesInput_df[i][1])
        bp = affineTransform(examplesInput_df[i][2])
        st = affineTransform(examplesInput_df[i][3])
        ins = affineTransform(examplesInput_df[i][4])
        bmi = affineTransform(examplesInput_df[i][5])
        dpf = affineTransform(examplesInput_df[i][6])
        age = affineTransform(examplesInput_df[i][7])
        uv = 1
        transformedOrderMap = {
            'Pregnancies' : preg,
            'Pregnancies*' : preg,
            'Glucose' : glu,
            'Glucose*' : glu,
            'BloodPressure' : bp,
            'BloodPressure*' : bp,
            'SkinThickness' : st,
            'SkinThickness*' : st,
            'Insulin' : ins,
            'Insulin*' : ins,
            'BMI' : bmi,
            'BMI*' : bmi,
            'DiabetesPedigreeFunction':dpf,
            'DiabetesPedigreeFunction*':dpf,
            'Age':age,
            'Age*':age,
            'UninformativeVariable':uv
        }
        for k in columnOrder:
            transformedExamples_df[i].append(transformedOrderMap[k])
    return transformedExamples_df

transfromedExamplesInput_df = transfromExample()

In [11]:
# This is a pytest to perform metamorphic testing on the classifer
# It allows us to check the correctness of the algorithm
# We compare the test results of the original model against the transformed model 
# Both the transfromed model and original model should be identical as the metamorphic relations shouldn't
    # change the the classifier at all, if there is a difference, there might be an issue within
    # the algorithm.
def test_model(original, transformed):
    print(original.all() == transformed.all())
    assert original.all() == transformed.all()

In [34]:
# This is a pytest to perform metamorphic testing on the classifier 
# It allows us to check the correctness of the algorithm
# We compare the prediction results of the original model and the transformed model
# Both the transfromed model and original model should be identical as the metamorphic relations shouldn't
    # change the the classifier at all, if there is a difference, there might be an issue within
    # the algorithm.
def test_model_prediction(original, transformed):
    print(original.predict(examplesInput_df).all() == transformed.predict(transfromedExamplesInput_df).all())
    assert original.predict(examplesInput_df).all() == transformed.predict(transfromedExamplesInput_df).all()

In [35]:
# This is a pytest to perform regression testing on the classifier 
# It allows us to compare if the new model is behaving the same on the example inputs as the old model
# We compare the prediction results of the original model and the new model on the example inputs
# Both the new model and original model should ideally behave the same way as we are counting on its predictabliity
    # if the results are different the tester should be informed, telling them to either update the testcase or look
    # further into the issue
def modelUnitTest(original, new):
    print("These are the original input results: ", original.predict(examplesInput_df))
    print("These are the new input results: ", new.predict(examplesInput_df))
    # expected output compared to new model 
    print(original.predict(examplesInput_df).all() == new.predict(examplesInput_df).all())
    assert original.predict(examplesInput_df).all() == new.predict(examplesInput_df).all()

In [14]:
# This is a pytest to perform regression testing on the classifier 
# It allows us to regression test the new model by ensuring it has the same results on the example input as
    # the predefined example input's output
# We compare the example input's output to the output generated by the new model
# The results should ideally be the same, if not then the tester should be informed about the change in behavior
def unitTest(new):
    print("These are the example output results: ", examplesOutput_df)
    print("These are the model input results: ", new.predict(examplesInput_df))
    # expected output compared to new model 
    print(examplesOutput_df.all() == new.predict(examplesInput_df).all())
    assert examplesOutput_df.all() == new.predict(examplesInput_df).all()

In [15]:
# Testing Cycle
# First check the correctness of the model 
# We do this using metamorphic testing

In [16]:
# Access the original model from the cache
# The time shows the access time
start = time.time()
originalModel = getModel(Simple_Diabetes_Predictor.df_hash, Simple_Diabetes_Predictor.K)
end = time.time()
print(end - start)

0.009830951690673828


In [17]:
# Transform the incoming dataset according to the metamorphic relations
transformed_df = transform_db(Simple_Diabetes_Predictor.df)
df_hashableT = HashableDataFrame(transformed_df)

In [37]:
# Access the transformed model from the cache
# The time shows the access time
start = time.time()
transformedModel = getTransformedModel(df_hashableT, Simple_Diabetes_Predictor.K) 
end = time.time()
print(end - start)

0.0035200119018554688


In [38]:
# Test the correctness of the algorithm using metamorphic testing
# Compare the test results of the original model and the transformed model
# The classifiers should be identical therefore the results should be the same
print(test_model(originalModel[1],transformedModel[1]))

True
None


In [39]:
# Test the correctness of the algorithm using metamorphic testing
# Compare the predictions of the original model and the transformed model
# The classifiers should be identical therefore the results should be the same
print(test_model_prediction(originalModel[0],transformedModel[0]))

True
None


In [21]:
# Regression Testing Example
# New data is added, resulting in a new model
    # We want to compare the new model to the old model and see if it behaves the same on the key example inputs
    
# Copy the first rows of the dataframe
df_addition = Simple_Diabetes_Predictor.df[0:50].copy()

# Add it to the new databae and transform the new database as well
new_df = Simple_Diabetes_Predictor.df.append(df_addition)

# Turn it to a hashable dataset
df_hashableNew = HashableDataFrame(new_df)

In [22]:
# Access the old model from the cache
# The time shows the access time
start = time.time()
oldModel = getModel(Simple_Diabetes_Predictor.df_hash, Simple_Diabetes_Predictor.K)
end = time.time()
print(end - start)

0.00669097900390625


In [40]:
# Train the new model, store it in cache and access the new model from the cache
# The time shows the access time
start = time.time()
newModel = getModel(df_hashableNew, Simple_Diabetes_Predictor.K)
end = time.time()
print(end - start)

0.002099275588989258


In [24]:
# Test if the two models are behaving the same way
# Compare the results on the example input for both the models
print(modelUnitTest(oldModel[0],newModel[0]))

These are the original input results:  [1 0 0 1 0]
These are the new input results:  [1 0 0 1 0]
True
None


In [41]:
# Test if the model in use is still behaving the same way
# Compare the results on the example input's output with the model in use predicitons
print(unitTest(newModel[0]))

These are the example output results:  [1 0 0 1 0]
These are the model input results:  [1 0 0 1 0]
True
None
