In [1]:
import numpy as np

In [2]:
import logging

from sklearn.datasets import make_classification
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn import metrics

In [3]:
logging.basicConfig(level=logging.DEBUG)

In [19]:
def classification():
    # Generate a random binary classification problem.
    X, y = make_classification(n_samples=500, n_features=10, n_informative=10,random_state=1111, 
                               n_classes=2,class_sep=2.5, n_redundant=0)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111)

    model = LogisticRegression()
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    print('classification, roc auc score: %s' % metrics.roc_auc_score(y_test, predictions))
    return y_test, predictions

In [20]:
def regression():
    # Generate a random regression problem
    X, y = make_regression(n_samples=500, n_features=5, n_informative=5,
                           n_targets=1, noise=0.05, random_state=1111,bias=0.5)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)

    model = LinearRegression()
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    print('regression, mse: %s' % metrics.mean_squared_error(y_test.flatten(), predictions.flatten()))
    return y_test, predictions

In [21]:
c_actual, c_predicted = classification()

classification, roc auc score: 0.9714285714285714




Not sure why we use it yet, will have to check with some encoded array
- np.argmax() Returns the indices of the maximum values along an axis, 
- using axis 1, we are taking along all rows i.e observations which has max val which here is one, as the input matrix would already have been one hot encoded and we are unhot encoding it



In [22]:
def unhot(function):
    """Convert one-hot representation into one column."""
    def wrapper(actual, predicted):
        if len(actual.shape) > 1 and actual.shape[1] > 1:
            actual = actual.argmax(axis=1)
        if len(predicted.shape) > 1 and predicted.shape[1] > 1:
            predicted = predicted.argmax(axis=1)
        return function(actual, predicted)
    return wrapper


In [23]:
@unhot
def printing(c_actual, c_predicted):
    print("c_actual", c_actual)
    print("c_predicted", c_predicted)

In [24]:
printing(c_actual, c_predicted)
print(c_actual, c_predicted)

c_actual [1 1 1 1 0 1 1 0 0 0 1 0 1 0 1 1 1 0 0 1 0 1 1 0 0 0 1 1 0 0 1 1 1 0 0 1 1
 0 0 0 1 1 0 1 1 1 1 1 0 0 1 0 1 1 0 1 0 0 1 0 1 0 0 1 1 1 0 0 0 1 1 1 0 0
 0]
c_predicted [1 1 1 1 0 1 1 0 0 0 1 0 1 0 1 1 1 0 0 1 0 1 1 0 0 0 1 1 0 1 1 1 1 0 0 1 1
 0 0 0 1 1 0 1 1 1 1 1 0 0 1 0 1 1 0 1 0 0 1 0 1 0 0 1 1 1 1 0 0 1 1 1 0 0
 0]
[1 1 1 1 0 1 1 0 0 0 1 0 1 0 1 1 1 0 0 1 0 1 1 0 0 0 1 1 0 0 1 1 1 0 0 1 1
 0 0 0 1 1 0 1 1 1 1 1 0 0 1 0 1 1 0 1 0 0 1 0 1 0 0 1 1 1 0 0 0 1 1 1 0 0
 0] [1 1 1 1 0 1 1 0 0 0 1 0 1 0 1 1 1 0 0 1 0 1 1 0 0 0 1 1 0 1 1 1 1 0 0 1 1
 0 0 0 1 1 0 1 1 1 1 1 0 0 1 0 1 1 0 1 0 0 1 0 1 0 0 1 1 1 1 0 0 1 1 1 0 0
 0]


---

# Metrics

In [53]:
ca, cp = c_actual, c_predicted
ca = np.arange(0,21,4)
cp = np.arange(10,31,4)
print(ca, cp)


[ 0  4  8 12 16 20] [10 14 18 22 26 30]


In [59]:
def absolute_error(actual, predicted):
    print (np.abs(actual - predicted))
    return (np.abs(actual - predicted))

absolute_error(ca, cp)


[10 10 10 10 10 10]


array([10, 10, 10, 10, 10, 10])

In [57]:
@unhot
def classification_error(actual, predicted):
    print((actual != predicted))
    print((actual != predicted).sum())
    print((actual != predicted).sum() / float(actual.shape[0]))
    return (actual != predicted).sum() / float(actual.shape[0])
classification_error(ca,cp)

[ True  True  True  True  True  True]
6
1.0


1.0

In [58]:
#@unhot
def accuracy(actual, predicted):
    print(1 - classification_error(actual, predicted))
accuracy(ca,cp)

[ True  True  True  True  True  True]
6
1.0
0.0


In [62]:
def mean_absolute_error(actual, predicted):
    print(np.mean(np.abs(actual - predicted)))
    return (np.mean(absolute_error(actual, predicted)))
mean_absolute_error(ca, cp)

10.0
[10 10 10 10 10 10]


10.0

In [67]:
def squared_error(actual, predicted):
    print(actual - predicted)
    return (actual - predicted)**2
squared_error(np.array([1,2,3,4,5]), np.array([2, 4, 85, 6, 8]))

[ -1  -2 -82  -2  -3]


array([   1,    4, 6724,    4,    9], dtype=int32)

In [68]:
squared_error(ca, cp)

[-10 -10 -10 -10 -10 -10]


array([100, 100, 100, 100, 100, 100], dtype=int32)

In [69]:
def squared_log_error(actual, predicted):
    #adding 1 as  log0 is NaN
    return (np.log(np.array(actual) + 1) - np.log(np.array(predicted) + 1)) ** 2


def mean_squared_log_error(actual, predicted):
    return np.mean(squared_log_error(actual, predicted))


def mean_squared_error(actual, predicted):
    return np.mean(squared_error(actual, predicted))


def root_mean_squared_error(actual, predicted):
    return np.sqrt(mean_squared_error(actual, predicted))


def root_mean_squared_log_error(actual, predicted):
    return np.sqrt(mean_squared_log_error(actual, predicted))


In [113]:
EPS = 1e-15
def logloss(actual, predicted):
    predicted = np.clip(predicted, EPS, 1 - EPS)
    loss = -np.sum(actual * np.log(predicted))
    return loss / float(actual.shape[0])

# print("Multiply",actual * np.log(predicted))
# print("-ve loss", loss)
# print("out",loss / float(actual.shape[0]))


In [77]:
ca = np.array([1,1,0,0,1,0,1])
cp = np.array([1,1,0,0,0,1,0])

np.clip will take - array and change 
- all the values less than min to min 
- all values max to given max to given max value

```
mathematically
x = min if x < min  or 
x = max if x > max
```

Here EPS is expotential; 1e-15 detones 10 to the power of -15 i.e very very low value but not equal to 0

> to calculate log for 0,1 we do clip

In [81]:
print(cp)
np.clip(cp, 0.3, 0.7)

[1 1 0 0 0 1 0]


array([0.7, 0.7, 0.3, 0.3, 0.3, 0.7, 0.3])

In [86]:
print(ca,cp)
ca*cp

[1 1 0 0 1 0 1] [1 1 0 0 0 1 0]


array([1, 1, 0, 0, 0, 0, 0])

In [92]:
logloss(ca, cp)

Multiply [-9.99200722e-16 -9.99200722e-16 -0.00000000e+00 -0.00000000e+00
 -3.45387764e+01 -0.00000000e+00 -3.45387764e+01]
-ve loss 69.07755278982137
out 9.868221827117338


9.868221827117338

In [93]:
logloss(np.array([1,1,0,0,1,0,1]),np.array([1,1,0,0,1,0,1]))

Multiply [-9.99200722e-16 -9.99200722e-16 -0.00000000e+00 -0.00000000e+00
 -9.99200722e-16 -0.00000000e+00 -9.99200722e-16]
-ve loss 3.996802888650566e-15
out 5.709718412357951e-16


5.709718412357951e-16

In [94]:
np.log(EPS) #i.e zero -> wrong prediction, loss value is very high

-34.538776394910684

In [95]:
np.log(1-EPS) #i.e one -> correct prediction, loss value is very very low -> -9.9e-16

-9.992007221626415e-16

In [109]:
np.log(np.clip(cp, EPS, 1 - EPS))

array([-9.99200722e-16, -9.99200722e-16, -3.45387764e+01, -3.45387764e+01,
       -3.45387764e+01, -9.99200722e-16, -3.45387764e+01])

In [110]:
ca * np.log(np.clip(cp, EPS, 1 - EPS))

array([-9.99200722e-16, -9.99200722e-16, -0.00000000e+00, -0.00000000e+00,
       -3.45387764e+01, -0.00000000e+00, -3.45387764e+01])

In [114]:
print(ca,cp)
print(np.log(np.clip(cp, EPS, 1 - EPS)))
print(ca * np.log(np.clip(cp, EPS, 1 - EPS)))

[1 1 0 0 1 0 1] [1 1 0 0 0 1 0]
[-9.99200722e-16 -9.99200722e-16 -3.45387764e+01 -3.45387764e+01
 -3.45387764e+01 -9.99200722e-16 -3.45387764e+01]
[-9.99200722e-16 -9.99200722e-16 -0.00000000e+00 -0.00000000e+00
 -3.45387764e+01 -0.00000000e+00 -3.45387764e+01]


> As per above observations for every wrong prediction - the multiplication value goes to zero and when summed together we get high logloss values, in magnitudes of 10 power 16

### As per above , false positives are not having any effect on the loss.!

In [105]:
def hinge(actual, predicted):
    print('multiply',actual * predicted)
    print('1-mul',1 - actual * predicted)
    print('max',np.max(1 - actual * predicted, 0))
    print('loss',np.mean(np.max(1 - actual * predicted, 0)))
    return np.mean(np.max(1 - actual * predicted, 0))

In [106]:
print('ca, cp', ca, cp)
hinge(ca,cp)

ca, cp [1 1 0 0 1 0 1] [1 1 0 0 0 1 0]
multiply [1 1 0 0 0 0 0]
1-mul [0 0 1 1 1 1 1]
max 1
loss 1.0


1.0

It will give 1 for any one failed prediction it seems

In [115]:
def binary_crossentropy(actual, predicted):
    predicted = np.clip(predicted, EPS, 1 - EPS)
    return np.mean(-np.sum(actual * np.log(predicted) + 
                           (1 - actual) * np.log(1 - predicted)))


In [124]:
print(ca,cp)
pred = np.clip(cp, EPS, 1 - EPS)
print(np.log(pred))
print(ca * np.log(pred))
print(np.sum(ca * np.log(pred)))
print(np.sum(ca * np.log(pred))/float(ca.shape[0]))
print(np.mean(-np.sum(ca * np.log(pred))))

[1 1 0 0 1 0 1] [1 1 0 0 0 1 0]
[-9.99200722e-16 -9.99200722e-16 -3.45387764e+01 -3.45387764e+01
 -3.45387764e+01 -9.99200722e-16 -3.45387764e+01]
[-9.99200722e-16 -9.99200722e-16 -0.00000000e+00 -0.00000000e+00
 -3.45387764e+01 -0.00000000e+00 -3.45387764e+01]
-69.07755278982137
-9.868221827117338
69.07755278982137


In [121]:
print(ca,cp)
pred = np.clip(cp, EPS, 1 - EPS)
print(1-ca, np.log(1-pred))
print(1-ca * np.log(1-pred))


[1 1 0 0 1 0 1] [1 1 0 0 0 1 0]
[0 0 1 1 0 1 0] [-3.45395760e+01 -3.45395760e+01 -9.99200722e-16 -9.99200722e-16
 -9.99200722e-16 -3.45395760e+01 -9.99200722e-16]
[35.53957599 35.53957599  1.          1.          1.          1.
  1.        ]
