In [8]:
import numpy as np
import math
import pandas as pd

In [2]:
def error_no_statement(y_pred, y_true):
    delta = 10 ** -10
    return -y_true * math.log(y_pred + delta) -(1 - y_true) * math.log(1 + delta - y_pred)

In [3]:
def total_error(y_pred, y_true):
    _sum = 0
    for pred, true in zip(y_pred, y_true):
        _sum += error_no_statement(pred, true)
    return _sum / len(y_true)

In [4]:
def create_all_positive_preds_dataset(size=1000):
    for i in range(size):
        true = np.random.randint(2)
        pred = 1        
        yield {'y_pred': pred, 'y_true': true}

In [5]:
def create_one_positive_preds_dataset(size=1000):
    yield {'y_pred': 1, 'y_true': 1}
    for i in range(size):
        true = np.random.randint(2)
        pred = 0        
        yield {'y_pred': pred, 'y_true': true}

In [6]:
def create_unbalanced_dataset(size=1000):
    yield {'y_pred': 0, 'y_true': 1}
    for i in range(size):
        true = 0
        pred = 0        
        yield {'y_pred': pred, 'y_true': true}

In [7]:
def create_ideal_dataset(size=1000):
    for i in range(size):
        true = np.random.randint(2)
        pred = true        
        yield {'y_pred': pred, 'y_true': true}

In [9]:
positive_dataset = pd.DataFrame(columns=['y_pred', 'y_true'])
for i in create_all_positive_preds_dataset():
    positive_dataset = positive_dataset.append(i, ignore_index=True)
positive_dataset.head()

Unnamed: 0,y_pred,y_true
0,1,0
1,1,0
2,1,1
3,1,1
4,1,1


In [16]:
total_error(positive_dataset['y_pred'], positive_dataset['y_true'])  # the error is big beacuse we predict correctly only half of classes, because our prediction is always 1 and true is 0 or 1

11.512925423549927

In [18]:
one_positive_dataset = pd.DataFrame(columns=['y_pred', 'y_true'])
for i in create_one_positive_preds_dataset():
    one_positive_dataset = one_positive_dataset.append(i, ignore_index=True)
one_positive_dataset.head()

Unnamed: 0,y_pred,y_true
0,1,1
1,0,0
2,0,1
3,0,0
4,0,1


In [19]:
total_error(one_positive_dataset['y_pred'], one_positive_dataset['y_true']) # the error is also big beacuse we predict correctly only the first item, and a half of following answers

11.616438281288923

In [20]:
unbalanced_dataset = pd.DataFrame(columns=['y_pred', 'y_true'])
for i in create_unbalanced_dataset():
    unbalanced_dataset = unbalanced_dataset.append(i, ignore_index=True)
unbalanced_dataset.head()

Unnamed: 0,y_pred,y_true
0,0,1
1,0,0
2,0,0
3,0,0
4,0,0


In [21]:
total_error(unbalanced_dataset['y_pred'], unbalanced_dataset['y_true'])  # the error is very small, we predict uncorrect only the first item, the other classes are all 0 and they are equal

0.023002847981960266

In [22]:
ideal_dataset = pd.DataFrame(columns=['y_pred', 'y_true'])
for i in create_ideal_dataset():
    ideal_dataset = ideal_dataset.append(i, ignore_index=True)
ideal_dataset.head()

Unnamed: 0,y_pred,y_true
0,0,0
1,0,0
2,0,0
3,1,1
4,0,0


In [23]:
total_error(ideal_dataset['y_pred'], ideal_dataset['y_true']) # our error is veeery little, beacuse our dataset is ideal and we dont have mistakes

-1.0000000826903526e-10