## Génération de figures pour le chapitre « Minimisation du risque empirique ».

In [None]:
%matplotlib inline
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

In [None]:
plt.rc('font', **{'family':'sans', 'size': 20})

matplotlib.rcParams.update({
    'text.usetex': True,
    'text.latex.preamble': r"\usepackage{mathptmx} \usepackage{amsmath} \usepackage{amssymb}"})

In [None]:
prop_cycle = plt.rcParams['axes.prop_cycle']
def_colors = prop_cycle.by_key()['color']

In [None]:
import scipy.stats as st

## Simple classification problem

In [None]:
X = np.random.uniform(low=-1.0, high=1.0, size=(250, 2))

In [None]:
pos = np.where(3*X[:, 0]**2 + X[:, 1]**2 < 0.5)[0]
neg = np.where(3*X[:, 0]**2 + X[:, 1]**2 >= 0.75)[0]

In [None]:
fig = plt.figure(figsize=(7, 7))
#plt.axis('off')
plt.scatter(X[pos, 0], X[pos, 1], marker='x', s=142)
plt.scatter(X[neg, 0], X[neg, 1], marker='+', s=142)

t=plt.xticks([-1, -0.5, 0, 0.5, 1.], ["-1", "-0.5", "0", "0.5", "1"])
t=plt.yticks([-1, -0.5, 0, 0.5, 1.], ["-1", "-0.5", "0", "0.5", "1"])

plt.xlabel("$x_1$")
plt.ylabel("$x_2$")

#t=plt.xlim([-4.2, 4.2])

plt.savefig('../figures/erm/simple_classif.pdf', bbox_inches='tight')

## Multiple solutions to linear classifier

In [None]:
X = np.loadtxt('data/linearly_separable_X.csv')
y = np.loadtxt('data/linearly_separable_y.csv')

# remove 11 and 25, which are too close to the hyperplane
X = np.delete(X, 25, axis=0)
X = np.delete(X, 11, axis=0)
y = np.delete(y, 25)
y = np.delete(y, 11)

pos = [i for i in range(X.shape[0]) if y[i] > 0]
neg = [i for i in range(X.shape[0]) if i not in pos]

In [None]:
fig = plt.figure(figsize=(8, 8))
plt.axis('off')
plt.scatter(X[pos, 0], X[pos, 1], marker='x', s=142)
plt.scatter(X[neg, 0], X[neg, 1], marker='+', s=142)
#plt.plot([0.1, 0.5], [0, 1], ":", lw=2, color='k') 
plt.plot([0.11, 0.51], [0, 1], ":", lw=2, color='k') 
plt.plot([0.12, 0.55], [0, 1], ":", lw=2, color='k') 
plt.plot([0.08, 0.6], [0, 1], ':', lw=2, color='k')
plt.plot([0.06, 0.5], [0, 1], ':', lw=2, color='k')
plt.plot([0.07, 0.58], [0, 1], ':', lw=2, color='k')

plt.savefig('../figures/erm/multiple_solutions.pdf', bbox_inches='tight')

## Cross-entropy

### Positive points

In [None]:
xvals = np.linspace(0.0001, 0.9999, 199)

In [None]:
from matplotlib.ticker import MultipleLocator, FormatStrFormatter

In [None]:
#from __future__ import unicode_literals
fig, ax = plt.subplots(figsize=(5, 5))

# perte 0/1
#yvals = np.where(xvals <=0, 1, 0)
#yvals[list(yvals).index(0)-1] = 0
#plt.plot(xvals, yvals, label="co\^ut 0/1", lw=3, ls='-')

# logistic loss
plt.plot(xvals, -np.log(xvals), label=u"entropie croisée", 
        lw=3, ls='-')

# xticks
majorLocator = MultipleLocator(0.2)
majorFormatter = FormatStrFormatter('%.1f')
# major ticks
ax.xaxis.set_major_locator(majorLocator)
ax.xaxis.set_major_formatter(majorFormatter)
# for the minor ticks, use no labels; default NullFormatter
minorLocator = MultipleLocator(0.1)
ax.xaxis.set_minor_locator(minorLocator)
#ax.tick_params(labelsize=18)

# yticks
majorLocator = MultipleLocator(2.)
majorFormatter = FormatStrFormatter('%.1f')
# major ticks
ax.yaxis.set_major_locator(majorLocator)
ax.yaxis.set_major_formatter(majorFormatter)
# for the minor ticks, use no labels; default NullFormatter
minorLocator = MultipleLocator(0.5)
ax.yaxis.set_minor_locator(minorLocator)

plt.xlabel(r"$f(\vec{x})$")
plt.ylabel(r"$L(+, f(\vec{x}))$")

plt.grid()

#lg = plt.legend(loc=(0.25, 0.6), frameon=False)
plt.savefig('../figures/erm/logistic_loss_pos.pdf', bbox_inches='tight')

In [None]:
fig, ax = plt.subplots(figsize=(5, 5))

# perte 0/1
#yvals = np.where(xvals >=0, 1, 0)
#yvals[list(yvals).index(0)-1] = 0
#plt.plot(xvals, yvals, label="co\^ut 0/1", lw=3, ls='--')

# perte quadratique
#plt.plot(xvals, (1+xvals)**2, label="co\^ut quadratique", lw=3, ls='-.')

# hinge loss
#yvals = np.where(xvals > -1, 1+xvals, 0)
#plt.plot(xvals, yvals, label="perte hinge", lw=3)

# logistic loss
plt.plot(xvals, -np.log(1-xvals), label=u"entropie croisée", 
        lw=3, ls='-')

# xticks
majorLocator = MultipleLocator(0.2)
majorFormatter = FormatStrFormatter('%.1f')
# major ticks
ax.xaxis.set_major_locator(majorLocator)
ax.xaxis.set_major_formatter(majorFormatter)
# for the minor ticks, use no labels; default NullFormatter
#minorLocator = MultipleLocator(1)
#ax.xaxis.set_minor_locator(minorLocator)
#ax.tick_params(labelsize=18)

# yticks
majorLocator = MultipleLocator(2.)
majorFormatter = FormatStrFormatter('%.1f')
# major ticks
ax.yaxis.set_major_locator(majorLocator)
ax.yaxis.set_major_formatter(majorFormatter)
# for the minor ticks, use no labels; default NullFormatter
#minorLocator = MultipleLocator(0.5)
#ax.yaxis.set_minor_locator(minorLocator)

plt.xlabel(r"$f(\vec{x})$")
plt.ylabel(r"$L(-, f(\vec{x}))$")

plt.grid()

#lg = plt.legend(loc=(0.05, 0.6), frameon=False)
plt.savefig('../figures/erm/logistic_loss_neg.pdf', bbox_inches='tight')

In [None]:
xvals = np.linspace(-3, 3, 199)

In [None]:
fig, ax = plt.subplots(figsize=(7, 7))

# perte 0/1
yvals = np.where(xvals <=0, 1, 0)
plt.plot(xvals, yvals, label="co\^ut 0/1", lw=3, ls='--')

# hinge loss
#yvals = np.where(xvals <=1, 1-xvals, 0)
#plt.plot(xvals, yvals, label="perte hinge", lw=3)

# logistic loss
plt.plot(xvals, np.log(1 + np.exp(-xvals)), label="co\^ut logistique", 
        lw=3, ls=':')

# xticks
majorLocator = MultipleLocator(1)
majorFormatter = FormatStrFormatter('%.1f')
# major ticks
ax.xaxis.set_major_locator(majorLocator)
ax.xaxis.set_major_formatter(majorFormatter)
# for the minor ticks, use no labels; default NullFormatter
#minorLocator = MultipleLocator(1)
#ax.xaxis.set_minor_locator(minorLocator)
#ax.tick_params(labelsize=18)

# yticks
majorLocator = MultipleLocator(1.)
majorFormatter = FormatStrFormatter('%.1f')
# major ticks
ax.yaxis.set_major_locator(majorLocator)
ax.yaxis.set_major_formatter(majorFormatter)
# for the minor ticks, use no labels; default NullFormatter
minorLocator = MultipleLocator(0.5)
ax.yaxis.set_minor_locator(minorLocator)

plt.xlabel(r"$y f(\vec{x})$")
plt.ylabel(r"$L(y, f(\vec{x}))$")

lg = plt.legend(loc=(0.25, 0.6), frameon=False)
#plt.savefig('../figures/erm/classif_losses.pdf', bbox_inches='tight')

## Linear regression

In [None]:
X = np.random.random(size=(50, ))
w = np.random.random()
eps = (np.random.random(size=(50, ))-0.5)*0.3

In [None]:
y = np.dot(X, w) + eps

In [None]:
f1 = X[1]*w 

In [None]:
fig = plt.figure(figsize=(6, 6))
ax = fig.add_subplot(1, 1, 1)

plt.scatter(X, y, marker='x', s=62)
plt.scatter(X[1], y[1], marker='x', s=122, color=def_colors[2])
plt.scatter(X[1], f1, marker='o', s=122, color=def_colors[1])
plt.plot([0, 1], [0, w], color=def_colors[1], ls='-', lw=3)

plt.plot([X[1], X[1]], [-0.1, 0.9], color='k', ls='--')
plt.plot([0, 1.1], [f1, f1], color='k', ls='--')
plt.plot([0, X[1]], [y[1], y[1]], color='k', ls='--')

distr = st.multivariate_normal(mean=f1, cov=0.005)
yvals = np.linspace(f1 - 0.2, f1 + 0.2, 100)
xvals = distr.pdf(yvals) * 0.025 + X[1]
plt.plot(xvals, yvals, color=def_colors[2], lw=3)

# Eliminate upper and right axes
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')


plt.text(X[1]-0.02, -0.15, r'$x^*$', color=def_colors[2])
plt.text(-0.25, f1-0.03, r'$f_{\alpha, \beta}(x^*)$', color=def_colors[1])
plt.text(-0.08, y[1]-0.02, r'$y^*$', color=def_colors[2])
plt.text(1.01, w, r'$f_{\alpha, \beta}(x)$', color=def_colors[1])
plt.text(X[1] + 0.05, y[1] - 0.1, r'$g_{Y|X=x^*}$', color=def_colors[2])

plt.xticks([], [])
plt.yticks([], [])
plt.xlim([0, 1.1])
plt.ylim([-0.1, 0.6])

plt.savefig('../figures/erm/linreg.pdf', bbox_inches='tight')