In [2]:
import numpy as np
import tensorflow as tf

np.random.seed(1)
tf.set_random_seed(1)

  return f(*args, **kwds)


# Tensorflow

## Variables input et output
$$X = \quad
\begin{bmatrix} 
x_{1}^{(1)} & x_{1}^{(1)} & ... & x_{1}^{(m)} \\
x_{2}^{(1)} & x_{2}^{(2)} & ... & x_{2}^{(m)} \\
\vdots & \vdots & ... & \vdots\\
x_{n_0}^{(1)} & x_{n_0}^{(2)} & ... & x_{n_0}^{(m)}
\end{bmatrix}
\in \mathbb{R^{n_0 \times{} m}}
$$

$$y = \quad
\begin{bmatrix} 
0 & 1 & ... & 0 \\
1 & 0 & ... & 0 \\
\vdots & \vdots & ... & \vdots\\
0 & 0 & ... & 1
\end{bmatrix}
\in \mathbb{R^{n_2 \times{} m}}
$$

In [3]:
# Input and output
n0 = 3
n2 = 2
X = tf.placeholder(tf.float32, shape=[n0, None], name="x")
y = tf.placeholder(tf.float32, shape=[2, None], name="y")
X, y

(<tf.Tensor 'x:0' shape=(3, ?) dtype=float32>,
 <tf.Tensor 'y:0' shape=(2, ?) dtype=float32>)

## Hidden layer

$$z_{1} = W_{1}X + b_{1}$$
$$a_{1} = \sigma(z_{1})$$

In [4]:
# Hidden layer
n1 = 5
W1 = tf.Variable(tf.random_normal([n1, n0]), tf.float32, name="W1")
b1 = tf.Variable(tf.random_normal([n1, 1]), tf.float32, name="b1")
z1 = tf.matmul(W1, X) + b1
a1 = tf.sigmoid(z1)
a1

<tf.Tensor 'Sigmoid:0' shape=(5, ?) dtype=float32>

## Output layer

$$z_{2} = W_{2}X + b_{2}$$

In [5]:
# Output layer
W2 = tf.Variable(tf.random_normal([n2, n1]), dtype=tf.float32, name="W2")
b2 = tf.Variable(tf.random_normal([n2, 1]), dtype=tf.float32, name="b2")
z2 = tf.matmul(W2, a1) + b2
z2

<tf.Tensor 'add_1:0' shape=(2, ?) dtype=float32>

$$\hat{y} = softmax(z_{2}) \in \mathbb{R^{n_2}}$$
$$y_{pred} = argmax(\hat{y}) \in \mathbb{R}$$

In [6]:
# Softmax
y_hat = tf.nn.softmax(z2) # [0.3, 0.7]
y_pred = tf.argmax(y_hat) # 0 (A) ou 1 (B)
y_hat, y_pred

(<tf.Tensor 'Softmax:0' shape=(2, ?) dtype=float32>,
 <tf.Tensor 'ArgMax:0' shape=(?,) dtype=int64>)

## Loss

$$ \mathcal{L}(y^{(i)}, \hat{y}^{(i)}) = - \sum_{j}{y^{(j)} log(\hat{y}^{(j)})} $$

$$ L = \frac{1}{m} \sum_{i=1}^{m}{\mathcal{L}(y^{(i)}, \hat{y}^{(i)})} $$

In [7]:
# Cross-entropy loss
cross_entropy = -tf.reduce_sum(y * tf.log(y_hat), axis=0)
cross_entropy = tf.reduce_mean(cross_entropy) # sur toutes les données
cross_entropy

<tf.Tensor 'Mean:0' shape=() dtype=float32>

## Gradient Descent

In [8]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_step = optimizer.minimize(cross_entropy)
train_step

<tf.Operation 'GradientDescent' type=NoOp>

## Train Dataset

$$X,y = \{(x^{(1)}, y^{(1)}),(x^{(2)}, y^{(2)}), ..., (x^{(m)}, y^{(m)})\}$$

In [9]:
m = 5
X_train = np.random.randn(3, m) # m vecteurs random dans R^3
y_train = np.eye(m)[np.random.choice(m, 2)] # m vecteurs one-hot random dans R^2
print(X_train)
print(y_train)

[[ 1.62434536 -0.61175641 -0.52817175 -1.07296862  0.86540763]
 [-2.3015387   1.74481176 -0.7612069   0.3190391  -0.24937038]
 [ 1.46210794 -2.06014071 -0.3224172  -0.38405435  1.13376944]]
[[0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0.]]


## Learning

In [10]:
sess = tf.InteractiveSession()

In [11]:
# Initialise les tf.Variable()
sess.run(tf.global_variables_initializer())

# Tensorboard
tf.summary.FileWriter("../tmp/simple_nn_with_tf", sess.graph)

for epoch in range(10):

    # sess.run(cross_entropy) -> calcul de la valeur de la loss
    # sess.run(train_step) -> calcul d'une étape de la Gradient Descent et mise à jour des paramètres
    # feed_dict -> données réelles que prennent les placeholders X et y
    loss, _ = sess.run([cross_entropy, train_step], feed_dict={X: X_train, y: y_train})

    print("epoch {} \t loss: {}".format(epoch, loss))

epoch 0 	 loss: 0.7989217042922974
epoch 1 	 loss: 0.7985903024673462
epoch 2 	 loss: 0.7982592582702637
epoch 3 	 loss: 0.7979282140731812
epoch 4 	 loss: 0.7975974678993225
epoch 5 	 loss: 0.7972668409347534
epoch 6 	 loss: 0.7969365119934082
epoch 7 	 loss: 0.7966062426567078
epoch 8 	 loss: 0.796276330947876
epoch 9 	 loss: 0.795946478843689


## Testing

In [11]:
m_test = 10
X_test = np.random.randn(3, m_test) # m vecteurs random dans R^3
y_test = np.random.randint(0, 2, size=m_test) # m valeurs dans {0,1}

In [12]:
# Feed forward avec les données de test
y_eval = sess.run(y_pred, feed_dict={X: X_test})
y_test, y_eval

(array([0, 0, 0, 1, 1, 1, 1, 1, 0, 1]), array([1, 0, 1, 1, 1, 0, 1, 1, 1, 0]))

$$ accuracy = \frac{1}{m} \sum_{i=1}^{m_{test}}{\mathbb{1}{\{y^{(i)} = \hat{y}^{(i)}\}}} $$

In [13]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_eval)

0.5

# Tensorboard

`tensorboard --logdir=../tmp/simple_nn_with_tf`