In [1]:
### Classification Accuracy

def accuracy_metric(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct / float(len(actual)) * 100.0

In [2]:
# Test accuracy

actual = [0,0,0,0,0,1,1,1,1,1]
predicted = [0,1,0,0,0,1,0,1,1,1]
accuracy = accuracy_metric(actual, predicted)
print(accuracy)

In [3]:
def confusion_matrix(actual, predicted):
    unique = set(actual)
    matrix = [list() for x in range(len(unique))]
    
    for i in range(len(unique)):
        matrix[i] = [0 for x in range(len(unique))]
    
    lookup = dict()
    for i, value in enumerate(unique):
        lookup[value] = i
    
    for i in range(len(actual)):
        x = lookup[actual[i]]
        y = lookup[predicted[i]]
        matrix[y][x] += 1
        
    return unique, matrix

In [4]:
# Test accuracy

actual = [0,0,0,0,0,1,1,1,1,1]
predicted = [0,1,1,0,0,1,0,1,1,1]

confusion_matrix(actual, predicted)
unique, matrix = confusion_matrix(actual, predicted)

print(unique)
print(matrix)

In [5]:
# Pretty print a confusion matrix

def print_confusion_matrix(unique, matrix): 
    print('(A) ' + ' '.join(str(x) for x in unique)) 
    print('(P) ---')
    for i, x in enumerate(unique):
        print("%s|  %s " % (x, ' '.join(str(x) for x in matrix[i])))

In [6]:
print_confusion_matrix(unique, matrix)

In [7]:
# Calculate mean absolute error

def mae_metric(actual, predicted):
    sum_error = 0.0
    for i in range(len(actual)):
        sum_error += abs(predicted[i] - actual[i])
    return sum_error / float(len(actual))

In [8]:
def rmse_metric(actual, predicted):
    sum_error = 0.0
    for i in range(len(actual)):
        prediction_error = predicted[i] - actual[i]
        sum_error += (prediction_error ** 2)
    mean_error = sum_error / float(len(actual))
    return sqrt(mean_error)

In [9]:
## Validation Accuracies

import pandas as pd
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix


dataset = 'pima-indians-diabetes.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 
         'pedi', 'age', 'class']
dataframe = pd.read_csv(dataset, names=names)
array = dataframe.values
X = array[:, 0:8]
Y = array[:, 8]
kfold = model_selection.KFold(n_splits=10, random_state=7, shuffle=True)
model = LogisticRegression(solver='liblinear')


In [10]:
# Cross Validation Classification Accuracy

results = model_selection.cross_val_score(model, X, Y, cv=kfold, 
                                          scoring='accuracy')
print("Accuracy: %.3f (%.3f)" % (results.mean()*100.0, results.std()))

Accuracy: 77.086 (0.051)


In [11]:
# Cross Validation Classification LogLoss

results = model_selection.cross_val_score(model, X, Y, cv=kfold, 
                                          scoring='neg_log_loss')
print("Logloss: %.3f (%.3f)" % (results.mean(), results.std()))

Logloss: -0.494 (0.042)


In [12]:
# Cross Validation Classification ROC AUC

results = model_selection.cross_val_score(model, X, Y, cv=kfold, 
                                          scoring="roc_auc")
print("AUC: %.3f (%.3f)" % (results.mean(), results.std()))

AUC: 0.826 (0.050)


In [13]:
# Cross Validation Classification Confusion Matrix

test_size = 0.33
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, 
                                                                    test_size=test_size, 
                                                                    random_state=7
                                                                   )
model = LogisticRegression(solver='liblinear')
model.fit(X_train, Y_train)
predicted = model.predict(X_test)
matrix = confusion_matrix(Y_test, predicted)
print(matrix)


[[141  21]
 [ 41  51]]


In [14]:
# Cross Validation Classification Report

from sklearn.metrics import classification_report

report = classification_report(Y_test, predicted)
print(report)

              precision    recall  f1-score   support

         0.0       0.77      0.87      0.82       162
         1.0       0.71      0.55      0.62        92

    accuracy                           0.76       254
   macro avg       0.74      0.71      0.72       254
weighted avg       0.75      0.76      0.75       254



In [15]:
############################
##   Regression Metrics   ##
############################


# Cross Validation Regression MAE

import pandas
from sklearn import model_selection
from sklearn.linear_model import LinearRegression


dataset = 'housing.csv'
names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
         'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
dataframe = pandas.read_csv(dataset, delim_whitespace=True, names=names)
array = dataframe.values
X = array[:, 0:13]
Y = array[:, 13]
kfold = model_selection.KFold(n_splits=10, random_state=7, shuffle=True)
model = LinearRegression()

scoring = 'neg_mean_absolute_error'
results = model_selection.cross_val_score(model, X, Y, cv=kfold, 
                                          scoring=scoring)
print("MAE: %.3f (%.3f)" % (results.mean(), results.std()))

MAE: -3.387 (0.667)


In [16]:
# Cross Validation Regression MSE

scoring = 'neg_mean_squared_error'
results = model_selection.cross_val_score(model, X, Y, cv=kfold, 
                                          scoring=scoring
                                         )
print("MSE: %.3f (%.3f)" % (results.mean(), results.std()))

MSE: -23.747 (11.143)


In [17]:
# Cross Validation Regression R^2

scoring = 'r2'
results = model_selection.cross_val_score(model, X, Y, cv=kfold, 
                                          scoring=scoring
                                         )
print("R^2: %.3f (%.3f)" % (results.mean(), results.std()))

R^2: 0.718 (0.099)
