# 2. Evaluating model performance

In [6]:
from math import sqrt
from prettytable import PrettyTable
from random import randrange, seed

### Define functions
* Train/test dataset split
* K-fold cross-validation dataset split
* Classification accuracy and confusion matrix
* Regression MAE and RMSE
* Random selection and Zero rule baselines

In [9]:
# Split a dataset into a train and test set, default 60/40 split
def train_test_split(dataset, split=0.60):
    train = list()
    # Number of rows training set requires from original dataset
    train_size = split * len(dataset)
    # Python passes by reference, so would otherwise change original dataset
    dataset_copy = list(dataset)
    # continue taking random elements until training set is defined length
    while len(train) < train_size:
        index = randrange(len(dataset_copy))
        # Pop returns one element (row, here) and removes it from the object
        # Takes a random row from dataset_copy and removes it from the pool
        train.append(dataset_copy.pop(index))
    # returns training set (0.6) and remainder of dataset_copy, i.e. testing set (0.4)
    return train, dataset_copy

# Instead of two, divide into k groups (folds) of data of equal size.
# For each group k, train the algorithm on the remaining k-1 groups, and test on k.
# Split dataset into k folds (3 by default)
def cross_validation_split(dataset, folds=3):
    # List of folds, i.e. each list object is a fold
    dataset_split = list()
    # Operate on a copy since dataset passed by reference, keeps original intact
    dataset_copy = list(dataset)
    # int division trims off excess rows that keep it from dividing into equal chunks
    fold_size = int(len(dataset)/folds)
    # Iterating over 0 to folds
    for i in range(folds):
        # Create list to hold rows in fold i
        fold = list()
        # Populate fold with rows until requisite size (determined by k and dataset)
        while len(fold) < fold_size:
            # Pick random index of the dataset row to add to fold i
            index = randrange(len(dataset_copy))
            # Add row to fold and remove from pool of possible subsequent rows
            fold.append(dataset_copy.pop(index))
        # Add populated fold to list of fold objects
        dataset_split.append(fold)
    # Return list of folds; each is itself a list of rows
    return dataset_split

def accuracy_metric(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct / float(len(actual)) * 100

# Construct confusion matrix
# Returns two objects: the set of unique actual values, and the matrix
# Matrix: 1st is actual values, 2nd is corresponding predictions values
def confusion_matrix(actual, predicted):
    # The "Set" (class, similar to list) of unique values in actual
    unique = set(actual)
    # matrix is a list of lists; one for each unique actual value
    matrix = [list() for x in range(len(unique))]
    # Default each cell to 0, then change later.
    for i in range(len(unique)):
        # Confusion matrix is always square
        matrix[i] = [0 for x in range(len(unique))]
    # Dictionary to index unique actual values
    lookup = dict()
    # Enumerate set of unique actuals
    for i, value in enumerate(unique):
        # Assign each unique actual value an index i (from enumeration)
        # Uses "value" as dict key, index i as dict's value
        lookup[value] = i
    # Iterate over all actual/prediction pairs
    for i in range(len(actual)):
        # Get actual value's index (i) from dictionary
        x = lookup[actual[i]]
        # Get predicted value's index (i) from dictionary
        y = lookup[predicted[i]]
        # Increment matrix cell count at index_1 = actual, index_2 = predicted
        matrix[x][y] += 1
    # Returns the set of unique values, and the matrix itself
    return unique, matrix

# Print human-readable confusion matrix, using PrettyTable
def print_confusion_matrix(unique, matrix):
    table = PrettyTable()
    # Set table headers
    table.field_names = ["A\P"] + [str(x) for x in unique]
    # Matrix: iterate over unique actual values -> for each, get counts of unique prediction values
    for i, value in enumerate(unique):
        # Matrix[i][j] = count of prediction j for actual i, e.g. nrs. of "No" and "Yes" when actual is "Yes"
        row = [str(value)] + [str(count) for count in matrix[i]]
        table.add_row(row)
    print(table)
    
def mae_metric(actual, predicted):
    sum_error = 0.0
    # iterate over all the values
    for i in range(len(actual)):
        sum_error += abs(predicted[i] - actual[i])
    # return MAE, float conversion to avoid integer division
    return sum_error / float(len(actual))

def rmse_metric(actual, predicted):
    sum_error = 0.0
    for i in range(len(actual)):
        #prediction_error = predicted[i] - actual[i]
        sum_error += ((predicted[i] - actual[i])**2)
    mean_error = sum_error / float(len(actual))
    return sqrt(mean_error)

# Generate random predictions
def random_algorithm(train, test):
    # Store output values in training set. Assumes the final column [-1] is the output
    output_values = [row[-1] for row in train]
    # Set-object's constructor gets unique values, and is then converted to a list
    unique = list(set(output_values))
    # List of algo prediction, index in list = test dataset row number
    predicted = list()
    # For each row in test set, select random output value as prediction
    for row in test:
        # Picks a random output value: probability independent of distribution!!!
        index = randrange(len(unique))
        # Set the randomly selected output value as the prediction for that row
        predicted.append(unique[index])
    return predicted

# Zero Rule for Classification models: for each testing row, predict the most common training output
def zero_rule_algorithm_classification(train, test):
    # Assuming output is final column in dataset, retrieve all outputs in training set
    output_values = [row[-1] for row in train]
    # Find most common (max of counts of each distinct value) output value in training set
    # If multiple predictions share the max count, the first observed in the set is returned
    prediction = max(set(output_values), key=output_values.count)
    # Predict the same value (most common in training set) for every row in testing set
    predicted = [prediction for i in range(len(test))]
    return predicted

# Zero Rule for Regression: for each testing row, predict the mean of training outputs
def zero_rule_algorithm_regression(train, test):
    # Assuming output is final column in dataset
    output_values = [row[-1] for row in train]
    # Calculates mean, used as our prediction
    prediction = sum(output_values) / float(len(output_values))
    # Predict the same value (mean of training outputs) for every row in testing set
    predicted = [prediction for i in range(len(test))]
    return predicted



### Testing train/test and cross validation splits

In [10]:
# test train/test split
seed(1) # ensure exact same split of data every time code is executed
dataset = [[1], [2], [3], [4], [5], [6], [7], [8], [9], [10]]
# Declare two lists simultaneously, since method returns two lists
train, test = train_test_split(dataset)
print("> Train and Test Split")
print("Train (60%):", train)
print("Test  (40%):", test, "\n")

# Test cross validation split
seed(1)
dataset = [[1], [2], [3], [4], [5], [6], [7], [8], [9], [10]]
# Dataset of 10 "rows" divided into 4 folds means each is of size 2
folds = cross_validation_split(dataset, 4)
print("> k-fold Cross Validation Split")
print("4 folds:", folds)

> Train and Test Split
Train (60%): [[3], [2], [7], [1], [8], [9]]
Test  (40%): [[4], [5], [6], [10]] 

> k-fold Cross Validation Split
4 folds: [[[3], [2]], [[7], [1]], [[8], [9]], [[10], [6]]]


### Testing evaluation metrics

In [11]:
# Test accuracy metric
actual = [0,0,0,0,0,1,1,1,1,1]  # 10 data points
predicted = [0,1,0,0,0,1,0,1,1,1]   # 2 mistakes
accuracy = accuracy_metric(actual, predicted)
print("Classification:", accuracy, "% \n")

# Test confusion matrix
print("Confusion matrix:")
actual = [0,0,0,0,0,1,1,1,1,1]  # 10 data points
predicted = [0,1,1,0,0,1,0,1,1,1]   # 3 mistakes

unique, matrix = confusion_matrix(actual, predicted)
print_confusion_matrix(unique, matrix)

actual2 = ["Yes", "No", "Yes", "No", "Yes", "Yes", "Yes", "No", "No", "Yes"]
predicted2 = ["Yes", "No", "No", "Yes", "Yes", "Yes", "Yes", "No", "No", "No"]
unique2, matrix2 = confusion_matrix(actual2, predicted2)
print_confusion_matrix(unique2, matrix2)

# Test MAE
actual = [0.1, 0.2, 0.3, 0.4, 0.5]
predicted = [0.11, 0.19, 0.29, 0.41, 0.5]
# all except one are wrong by 0.01; expect MAE just < 0.01
mae = mae_metric(actual, predicted)
print("\nMAE: ", mae)

rmse = rmse_metric(actual, predicted)
print("RMSE:", rmse)

Classification: 80.0 % 

Confusion matrix:
+-----+---+---+
| A\P | 0 | 1 |
+-----+---+---+
|  0  | 3 | 2 |
|  1  | 1 | 4 |
+-----+---+---+
+-----+-----+----+
| A\P | Yes | No |
+-----+-----+----+
| Yes |  4  | 2  |
|  No |  1  | 3  |
+-----+-----+----+

MAE:  0.007999999999999993
RMSE: 0.00894427190999915


### Testing simple baselines

In [12]:
# Testing random prediction generator
seed(1)
train = [[0], [1], [0], [1], [0], [1], [1], [1], [1], [1], [1], [1], [1]]
test = [[None], [None], [None], [None], [None], [None], [None], [None], [None], [None], [None]]
predictions = random_algorithm(train, test)
print("Random predictions:",predictions)

# Testing zero rule for classification
train = [['0'], ['0'], ['0'], ['0'], ['1'], ['1']]
test = [[None], [None], [None], [None]]
predictions = zero_rule_algorithm_classification(train, test)
print("\nZero Rule classification predictions:", predictions)

# Testing zero rule for regression
train = [[10], [15], [12], [15], [18], [20]]
test = [[None], [None], [None], [None]]
predictions = zero_rule_algorithm_regression(train, test)
print("Zero Rule regression predictions:", predictions)


Random predictions: [0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1]

Zero Rule classification predictions: ['0', '0', '0', '0']
Zero Rule regression predictions: [15.0, 15.0, 15.0, 15.0]
