# Calculate metrics Manually
* we will work on Titanic survival database
* a simple KNN

In [2]:
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

In [3]:
# load data
titanic = sns.load_dataset('titanic')
# transform 'sex' column to numerical values
titanic.iloc[:, 2] = LabelEncoder().fit_transform(titanic.iloc[:, 2].values)
# Drop null values
titanic = titanic.dropna()
# Create X and y
X = titanic.iloc[:, 1:4].values
Y = titanic.iloc[:, 0].values
# split data into train (80%), and test (20%)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
# create an object from KNeiborsClassifier class and fit the model woth training data
knn = KNeighborsClassifier().fit(X_train, Y_train)

In [4]:
# make prediction save them in y_pred
y_pred = knn.predict(X_test)

* True positive (TP) = the number of cases correctly identified as positive
* False positive (FP) = the number of cases incorrectly identified as positive
* True negative (TN) = the number of cases correctly identified as negative
* False negative (FN) = the number of cases incorrectly identified as negative

In [5]:
TP = 0
TN = 0
FP = 0
FN = 0

for i in range(y_pred.shape[0]):
    if y_pred[i] == 1 and Y_test[i]==1:
        TP+=1
    if y_pred[i] == 0 and Y_test[i]==0:
        TN+=1
    if y_pred[i] == 1 and Y_test[i]==0:
        FP+=1
    if y_pred[i] == 0 and Y_test[i]==1:
        FN+=1

print('TN: Nombre de True negatives = ' , TN)
print('FP: Nombre de False positives = ' , FP)
print('FN: Nombre de False negatives = ' , FN)
print('TP: Nombre de True positives = ' , TP)

TN: Nombre de True negatives =  4
FP: Nombre de False positives =  6
FN: Nombre de False negatives =  7
TP: Nombre de True positives =  20


### Accuracy
The accuracy of a test is its ability to differentiate the patient and healthy cases correctly. To estimate the accuracy of a test, we should calculate the proportion of true positive and true negative in all evaluated cases.

*Mathematically, this can be stated as:* **Accuracy=TP+TN/(TP+TN+FP+FN)**

In [10]:
accuracy = (TP+TN)/(TP+TN+FP+FN)
print('accuracy = %.4f' %accuracy)

accuraccy = 0.6486


In [15]:
# Calculate accuracy percentage
cnt = 0
for i in range(len(Y_test)):
    if Y_test[i] == y_pred[i]:
        cnt += 1
accuracy_metric = cnt / float(len(Y_test)) * 100.0
print('Accuracy metric = %.4f' %accuracy_metric)

Accuracy metric = 64.8649


### Sensitivity or Recall
The sensitivity of a test is its ability to determine the patient cases correctly. To estimate it, we should calculate the proportion of true positive in patient cases.
*Mathematically, this can be stated as:* **Sensitivity=TP/(TP+FN)**

In [11]:
sensitivity = TP/(TP+FN)
print('Sensitivity = %.4f' %sensitivity)

Sensitivity = 0.7407


### Specificity
The specificity of a test is its ability to determine the healthy cases correctly. To estimate it, we should calculate the proportion of true negative in healthy cases.
*Mathematically, this can be stated as:* **Specificity=TN/(TN+FP)**

In [12]:
specificity = TN/(TN+FP)
print('Specificity = %.4f' %specificity)

Specificity = 0.4000


### Precision
Mathematically : **Precision = TP  / FP + TP**

In [20]:
precision = TP/(TP + FP)
print('Precision = %.4f' %precision)

Precision = 0.7692


### RMSE
RMSE is calculated as the square root of the mean of the squared differences between actual outcomes and predictions.
Mathematically: **RMSE = sqrt( sum( (predicted_i - real_i)^2 ) / total predictions)**

In [18]:
from math import sqrt

sum_error = 0.0
for i in range(len(Y_test)):
    prediction_error = y_pred[i] - Y_test[i]
    sum_error += (prediction_error ** 2)
mean_error = sum_error / float(len(Y_test))
rmse = sqrt(mean_error)
print('RMSE = %.4f' %rmse)

RMSE = 0.5927


### MAE
The Mean Absolute Error or MAE for short is a good first error metric to use. It is calculated as the average of the absolute error values, where “absolute” means “made positive” so that they can be added together.
Mathematically:  **MAE = sum( abs(predicted_i - real_i) ) / total predictions**

In [19]:
sum_error = 0.0
for i in range(len(Y_test)):
    sum_error += abs(y_pred[i] - Y_test[i])
mae = sum_error / float(len(Y_test))
print('MAE = %.4f' %mae)

MAE = 0.3514
