In [None]:
%pylab inline
import numpy as np
import pandas as pd
import scipy.stats
from matplotlib.backends.backend_pdf import PdfPages

In [None]:
import sys
sys.path.append("../errortools/")
import errortools

# Fitting and predicting

In [None]:
ndim = 3
fit_intercept = True
ndata = 100

p_true = [2, 0, -2, 0]

In [None]:
np.random.seed(42)
X = np.random.uniform(low=-1, high=1, size=ndim*ndata).reshape(ndata, ndim)
p = scipy.stats.logistic.cdf(np.dot(np.concatenate((X, np.ones((X.shape[0],1), dtype=float)), axis=1), p_true))
y = (p > np.random.uniform(size=ndata)).astype(int)

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(15,4))

ax[0].plot(X[y==0,0], X[y==0,1], 'o', color='orange', alpha=0.2, markersize=5)
ax[0].plot(X[y==1,0], X[y==1,1], 'o', color='green',  alpha=0.2, markersize=5)
ax[0].set_xlabel("x0")
ax[0].set_ylabel("x1")

ax[1].plot(X[y==0,0], X[y==0,2], 'o', color='orange', alpha=0.2, markersize=5)
ax[1].plot(X[y==1,0], X[y==1,2], 'o', color='green',  alpha=0.2, markersize=5)
ax[1].set_xlabel("x0")
ax[1].set_ylabel("x2")

ax[2].plot(X[y==0,1], X[y==0,2], 'o', color='orange', alpha=0.2, markersize=5)
ax[2].plot(X[y==1,1], X[y==1,2], 'o', color='green',  alpha=0.2, markersize=5)
ax[2].set_xlabel("x1")
ax[2].set_ylabel("x2");

In [None]:
model = errortools.LogisticRegression(fit_intercept=True)
model.fit(X,y)

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(20,5))

nstddvs = 1

p = model.parameters
cvr_mtx = model.cvr_mtx
prc_mtx = np.linalg.inv(cvr_mtx)

u = np.linspace(-2, 2, 100).reshape(-1,1)
a = np.zeros((100,1), dtype=float)

x = np.concatenate((u, a, a), axis=1)
f = model.predict(x)
el1, eu1 = model.estimate_errors(x, nstddvs)
es = model.estimate_errors_sampling(x, 100)
el = model.estimate_errors_linear(x, 1)
g = scipy.stats.logistic.cdf(np.dot(np.concatenate((x,np.ones((x.shape[0],1))),axis=1), p_true))
ax[0].plot(u, g, '-', color='black', alpha=1, label="true curve")
ax[0].plot(u, f, '-', color='red', label="fitted curve")
ax[0].fill_between(x=u.ravel(), y1=f-el1, y2=f+eu1, alpha=0.3, color='green', label="error")
ax[0].fill_between(x=u.ravel(), y1=f-nstddvs*es, y2=f+nstddvs*es, alpha=0.3, color='orange', label="sampled error")
ax[0].fill_between(x=u.ravel(), y1=f-nstddvs*el, y2=f+nstddvs*el, alpha=0.3, color='blue', label="linear error")
ax[0].set_xlabel("x0")
ax[0].set_ylabel("logistic prob")
ax[0].legend()

x = np.concatenate((a, u, a), axis=1)
f = model.predict(x)
el1, eu1 = model.estimate_errors(x, nstddvs)
es = model.estimate_errors_sampling(x, 100)
el = model.estimate_errors_linear(x, 1)
g = scipy.stats.logistic.cdf(np.dot(np.concatenate((x,np.ones((x.shape[0],1))),axis=1), p_true))
ax[1].plot(u, g, '-', color='black', alpha=1, label="true curve")
ax[1].plot(u, f, '-', color='red', label="fitted curve")
ax[1].fill_between(x=u.ravel(), y1=f-el1, y2=f+eu1, alpha=0.3, color='green', label="error")
ax[1].fill_between(x=u.ravel(), y1=f-nstddvs*es, y2=f+nstddvs*es, alpha=0.3, color='orange', label="sampled error")
ax[1].fill_between(x=u.ravel(), y1=f-nstddvs*el, y2=f+nstddvs*el, alpha=0.3, color='blue', label="linear error")
ax[1].set_xlabel("x1")
ax[1].set_ylabel("logistic prob")
ax[1].legend()

x = np.concatenate((a, a, u), axis=1)
f = model.predict(x)
el1, eu1 = model.estimate_errors(x, nstddvs)
es = model.estimate_errors_sampling(x, 100)
el = model.estimate_errors_linear(x, 1)
g = scipy.stats.logistic.cdf(np.dot(np.concatenate((x,np.ones((x.shape[0],1))),axis=1), p_true))
ax[2].plot(u, g, '-', color='black', alpha=1, label="true curve")
ax[2].plot(u, f, '-', color='red', label="fitted curve")
ax[2].fill_between(x=u.ravel(), y1=f-el1, y2=f+eu1, alpha=0.3, color='green', label="error")
ax[2].fill_between(x=u.ravel(), y1=f-nstddvs*es, y2=f+nstddvs*es, alpha=0.3, color='orange', label="sampled error")
ax[2].fill_between(x=u.ravel(), y1=f-nstddvs*el, y2=f+nstddvs*el, alpha=0.3, color='blue', label="linear error")
ax[2].set_xlabel("x2")
ax[2].set_ylabel("logistic prob")
ax[2].legend();


# Create report (2 ways)

In [None]:
features = ['x1', 'x2', 'x3', 'bias']
with PdfPages('Report.pdf') as pdf:
    errortools.errortools.report_correlation_matrix(model, features, pdf)
    errortools.errortools.report_parameter_error(model, features, pdf)
    errortools.errortools.report_loss_versus_approximation(model, X, y, 0, 0, features, pdf)
    errortools.report_error_indivial_pred(model, X[0], 'x1', features, 0, 20, 100, pdf)
    errortools.report_error_indivial_pred(model, X[0], 'x2', features, 0, 20, 100, pdf)
    errortools.report_model_positive_ratio(model, X, y, 1000, 10, pdf)
    errortools.report_error_test_samples(model, X, pdf)

In [None]:
pdf = errortools.errortools.report_correlation_matrix(model, features=features)
pdf = errortools.errortools.report_parameter_error(model, features, pdf)
pdf = errortools.errortools.report_loss_versus_approximation(model, X, y, 0, 0, features, pdf)
pdf = errortools.report_error_indivial_pred(model, X[0], 'x1', features, 0, 20, 100, pdf)
pdf = errortools.report_error_indivial_pred(model, X[0], 'x2', features, 0, 20, 100, pdf)
pdf = errortools.report_model_positive_ratio(model, X, y, 1000, 10, pdf)
pdf = errortools.report_error_test_samples(model, X, pdf)
pdf.close()