### Error of prediction for multi-target regression
Data from "probabilities-multi-target.txt" will be compared to "true-multi-target.txt" <br/>
Algorithm considers true for the attributes which probability is higher than .5 and less than 1.

#### Loss Function - Mean Squared Error(MSE) and Error of prediction (EoP) Root Mean Squared Error (RMSE) metric
Loss: $MSE = \frac{1}{n} \sum_i \sum_j (y_{ij} - \hat{y}_{ij})^2$ <br/>
EoP: $RMSE = \sqrt{MSE}$ <br/>
Where $y_{ij}$ is the actual value of the j-th target for the i-th data point and $\hat{y}_{ij}$ is the predicted value of the j-th target variable for the i-th data point and $n$ is the number of targets.

In [17]:
import math
import numpy as np

#EoP_MSE_MT_r

def Loss_EoP_MSE_MT_r(filePath):
    true_matrix = np.loadtxt(fname="evaluation-metrics-main/sports.csv", delimiter=",", skiprows=1, usecols=(0, 1, 2))
    computed_matrix = np.loadtxt(fname="evaluation-metrics-main/sports.csv", delimiter=",", skiprows=1, usecols=(3, 4, 5))
    Loss = np.sum((true_matrix - computed_matrix) ** 2) / true_matrix.shape[0]
    EoP = np.sqrt(Loss)
    return Loss, EoP

Loss, EoP = Loss_EoP_MSE_MT_r("evaluation-metrics-main/sports.csv")
print(f"Loss function for sports.csv using MSE: {Loss}")
print(f"Error of prediction for sports.csv using RMSE: {EoP}")

Loss function for sports.csv using MSE: 43.35
Error of prediction for sports.csv using RMSE: 6.5840716885526085


### Accuracy, precision, recall for multi-class
Data from "probabilities-multi-class.txt" and "true-multi-class.txt"<br/>

Definitions
<ul>
    <li>True positive(TP): the number of instances that are correctly classified as positive for a given class</li>
    <li>False positive(FP): the number of instances that are incorrectly classified as positive for a given class</li>
    <li>True Negative(TN): the number of instances that are correctly classified as negative for a given class</li>
    <li>False negative(FN): the number of instances that are incorrectly classified as negative for a given class</li>
</ul>

#### Accuracy
Accuracy for class i: The proportion of correctly classified instances for class i to the total number of instances for class i.<br/>
Accuracy for class i = $\frac{TP_i + TN_i}{TP_i + FP_i + TN_i + FN_i} $ <br/>

#### Precision
Precision for class i: The proportion of true positives for class i to the total number of predicted positives for class i.<br/>
Precision for class i = $\frac{TP_i}{TP_i + FP_i}$

#### Recall
Recall for class i: The proportion of true positives for class i to the total number of actual positives for class i.<br/>
Recall for class i = $\frac{TP_i}{TP_i + FN_i}$

In [30]:
import numpy as np
def calc_arrays(computed_line, true_line, TP, FP, TN, FN):
    for index in range(len(computed_line)):
        if computed_line[index] == 1 and true_line[index] == 1:
            TP[index] += 1
        if computed_line[index] == 1 and true_line[index] == 0:
            FP[index] += 1
        if computed_line[index] == 0 and true_line[index] == 0:
            TN[index] += 1
        if computed_line[index] == 0 and true_line[index] == 1:
            FN[index] += 1
    return TP, FP, TN, FN

def print_ACC_PREC_REC_forClass(Class, TP, FP, TN, FN):
    print(f'For class {Class}')
    ACC = (TP + TN) / (TP + FP + TN + FN)
    PREC = TP / (TP + FP)
    REC = TP / (TP + FN)
    print(f'Accuracy = {ACC}, Precision = {PREC}, Recall = {REC}')

def ACC_PREC_REC(prob_filePath, true_filePath):
    prob_file = open(prob_filePath, "r") 
    true_file = open(true_filePath, "r") 

    prob_line = prob_file.readline()
    true_line = true_file.readline()
    firstIteration = True
    noSamples = 0
    while prob_line and true_line:
        noSamples += 1
        prob_line = prob_line.split(" ")
        true_line = true_line.split(" ")

        true_line = [int(i) for i in true_line]
        prob_line = [float(i) for i in prob_line]

        if firstIteration == True:
            firstIteration = False
            noClasses = len(prob_line)
            # *arrays for the TP/TN/FP/FN values for each class which will be updated while reading from the files
            TP = np.zeros(noClasses)
            FP = np.zeros(noClasses)
            TN = np.zeros(noClasses)
            FN = np.zeros(noClasses)

        max_prob_index = np.argmax(prob_line)
        computed_line = [1 if index == max_prob_index else 0 for index in range(len(prob_line))]

        TP, FP, TN, FN = calc_arrays(computed_line, true_line, TP, FP, TN, FN)

        prob_line = prob_file.readline()
        true_line = true_file.readline()
    # print(F'TP = {TP}')
    # print(F'TN = {TN}')
    # print(F'FP = {FP}')
    # print(F'FN = {FN}')
    for Class in range(noClasses):
        print_ACC_PREC_REC_forClass(Class, TP[Class], FP[Class], TN[Class], FN[Class])

def calc_arrays_stringed(computed_line, true_line, classes, TP, FP, TN, FN):
    for ClassIndex in range(len(classes)):
        current_class = classes[ClassIndex]
        for index in range(true_line.shape[0]):
            if computed_line[index] == current_class and true_line[index] == current_class:
                TP[ClassIndex] += 1
            if computed_line[index] == current_class and true_line[index] != current_class:
                FP[ClassIndex] += 1
            if computed_line[index] != current_class and true_line[index] != current_class:
                TN[ClassIndex] += 1
            if computed_line[index] != current_class and true_line[index] == current_class:
                FN[ClassIndex] += 1
    return TP, FP, TN, FN
            
        

def ACC_PREC_REC_2(filePath, classes):
    # * for a file 
    true_line = np.loadtxt(fname=filePath, usecols=(0), skiprows=1, delimiter=",", dtype=str)
    computed_line = np.loadtxt(fname=filePath, usecols=(1), skiprows=1, delimiter=",", dtype=str)
    
    noClasses = len(classes)

    TP = np.zeros(noClasses)
    TN = np.zeros(noClasses)
    FP = np.zeros(noClasses)
    FN = np.zeros(noClasses)

    TP, TN, FP, FN = calc_arrays_stringed(computed_line, true_line, classes, TP, FP, TN, FN)
    for index in range(noClasses):
        print_ACC_PREC_REC_forClass(classes[index], TP[index], FP[index], TN[index], FN[index])

ACC_PREC_REC("evaluation-metrics-main/probabilities-multi-class.txt", "evaluation-metrics-main/true-multi-class.txt")

ACC_PREC_REC_2("evaluation-metrics-main/flowers.csv", classes=["Daisy", "Tulip", "Rose"])

For class 0
Accuracy = 0.82, Precision = 0.717391304347826, Recall = 0.868421052631579
For class 1
Accuracy = 0.83, Precision = 0.78125, Recall = 0.7142857142857143
For class 2
Accuracy = 0.87, Precision = 0.8181818181818182, Recall = 0.6666666666666666
For class Daisy
Accuracy = 0.4230769230769231, Precision = 0.3333333333333333, Recall = 0.36363636363636365
For class Tulip
Accuracy = 0.2692307692307692, Precision = 0.125, Recall = 0.2857142857142857
For class Rose
Accuracy = 0.3076923076923077, Precision = 0.14285714285714285, Recall = 0.25


### Log Loss for binary classification (Binary Cross-Entropy Loss)
Definitions <br/>
$N$ - Number of entries <br/>
$y_i$ - Actual value of ith entry<br/>
$p(y_i)$ - Predicted probability <br/>
L = $\frac{1}{N}\sum_{i=1}^N-(y_{i} \times log(p(y_i)) + (1 - y_i) \times log(1 - p(y_i)))$ <br/>

In [19]:
import numpy as np
import math

def LogLoss(prob_filePath, true_filePath):
    true_matrix = np.loadtxt(true_filePath)
    prob_matrix = np.loadtxt(prob_filePath)

    Loss = -(true_matrix * np.log(prob_matrix) + (1 - true_matrix) * np.log(1 - prob_matrix))
    return np.mean(Loss)

print(LogLoss("evaluation-metrics-main/probabilities-binary.txt", "evaluation-metrics-main/true-binary.txt"))
print(LogLoss("evaluation-metrics-main/probabilities-multi-class.txt", "evaluation-metrics-main/true-multi-class.txt"))
print(LogLoss("evaluation-metrics-main/probabilities-multi-target.txt", "evaluation-metrics-main/true-multi-target.txt"))


0.6923370719995571
0.5148514558114434
0.5626719394292647
