# 04 - Evaluation Metrics

### Classification accuracy

$accuracy = \frac{correct predictions}{total predictions} * 100$

0% is the worst possible accuracy  
100% is the best possible accuracy

In [12]:
from math import sqrt

In [2]:
# Calculate the accuracy percentage between two lists
def accuracy_metric(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct / float(len(actual)) * 100.0

In [3]:
# Test accuracy
actual = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
predicted = [0, 1, 0, 0, 0, 1, 0, 1, 1, 1]
accuracy = accuracy_metric(actual, predicted)
print(accuracy)

80.0


### Confusion Matrix



In [4]:
# Calculate a confusion matrix
def confusion_matrix(actual, predicted):
    unique = set(actual)
    matrix = [list() for x in range(len(unique))]
    for i in range(len(unique)):
        matrix[i] = [0 for x in range(len(unique))]
    lookup = dict()
    for i, value in enumerate(unique):
        lookup[value] = i
    for i in range(len(actual)):
        x = lookup[actual[i]]
        y = lookup[predicted[i]]
        matrix[y][x] += 1
    return unique, matrix

In [5]:
# Test confusion matrix with integers
actual = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
predicted = [0, 1, 1, 0, 0, 1, 0, 1, 1, 1]
unique, matrix = confusion_matrix(actual, predicted)
print(unique)
print(matrix)

{0, 1}
[[3, 1], [2, 4]]


In [8]:
# Pretty print a confusion matrix
def print_confusion_matrix(unique, matrix):
    print('(A)' + ' '.join(str(x) for x in unique))
    print('(P)---')
    for i, x in enumerate(unique):
        print("%s| %s" % (x, ' '.join(str(x) for x in matrix[i])))

In [9]:
# Test confusion matrix with integers
actual = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
predicted = [0, 1, 1, 0, 0, 1, 0, 1, 1, 1]
unique, matrix = confusion_matrix(actual, predicted)
print_confusion_matrix(unique, matrix)

(A)0 1
(P)---
0| 3 1
1| 2 4


### Mean Absolute Error

In [10]:
# Calculate mean absolute error
def mae_metric(actual, predicted):
    sum_error = 0.0
    for i in range(len(actual)):
        sum_error += abs(predicted[i] - actual[i])
    return sum_error / float(len(actual))

In [11]:
# Test MAE
actual = [0.1, 0.2, 0.3, 0.4, 0.5]
predicted = [0.11, 0.19, 0.29, 0.41, 0.5]
mae = mae_metric(actual, predicted)
print(mae)

0.007999999999999993


### Root Mean Squared Error

In [13]:
# Calculate root mean squared error
def rmse_metric(actual, predicted):
    sum_error = 0.0
    for i in range(len(actual)):
        prediction_error = predicted[i] - actual[i]
        sum_error += (prediction_error ** 2)
    mean_error = sum_error / float(len(actual))
    return sqrt(mean_error)

In [14]:
# Test RMSE
actual = [0.1, 0.2, 0.3, 0.4, 0.5]
predicted = [0.11, 0.19, 0.29, 0.41, 0.5]
rmse = rmse_metric(actual, predicted)
print(rmse)

0.00894427190999915


## Future Works

Implement these metrics:

* Precision for classification.
* Recall for classification.
* F1 for classification.
* Area Under ROC Curve or AUC for classification.
* Goodness of Fit or R^2 (R squared) for regression.