From 1be0ade9aea23dd56bae9d6e6e0f3abbd7e75c20 Mon Sep 17 00:00:00 2001 From: Arsenii Ashukha Date: Sun, 27 Aug 2017 15:47:53 +0300 Subject: [PATCH] sem2 --- .DS_Store | Bin 0 -> 6148 bytes 2_ae_with_gaps.ipynb | 336 ---------- 3_vae_with_gaps.ipynb | 486 --------------- 4_ss_vae.ipynb | 559 ----------------- sem2-dnn4classification/mnist.py | 77 --- .../my_first_nn_lsagne.ipynb | 586 ------------------ utils.py | 115 ---- 7 files changed, 2159 deletions(-) create mode 100644 .DS_Store delete mode 100644 2_ae_with_gaps.ipynb delete mode 100644 3_vae_with_gaps.ipynb delete mode 100644 4_ss_vae.ipynb delete mode 100644 sem2-dnn4classification/mnist.py delete mode 100644 sem2-dnn4classification/my_first_nn_lsagne.ipynb delete mode 100644 utils.py diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..507cca7fa365ab8f38a192e00a1f83db5234bba4 GIT binary patch literal 6148 zcmeHK!AiqG5S?wSCWzRBRF8YnQ-!KX^&o_*2X8_}4=OdWQ3ElNB(-R*S0O0l-2fEZEq5BQ#DrAqC?h6e`C$Ttf&h zr1)c*1_o&FD!5xd+`}e}?q2{m@C0jci(?ULg8Q0*?{(7$FN{5m`|uvZAkAvEPf;kA zW@hIs+qUcl>#}(lcbl1;b<&2LoU2~Y_B~^t#Bn&ZUGFsTyRGulejH_P5c$c-^oz3~ zYRBD1+=<$$%5&^ycNnV{YLzR!-q!YRRo41d*{j!URk>5I_xqN$yt=V>bansq+%*w*tP=s0?{ZdT_;b`QR8DIv+87S&*nd<-H&-efFAnq{(%)q~5K$H%e2Mv5B zQ(Grqj%uw$y+S3SdAY`62pXys<6J7mE2v7)FUdf34CWfqgTfyIiUw|&fj?#73!nI0 A7XSbN literal 0 HcmV?d00001 diff --git a/2_ae_with_gaps.ipynb b/2_ae_with_gaps.ipynb deleted file mode 100644 index fd1c6fd..0000000 --- a/2_ae_with_gaps.ipynb +++ /dev/null @@ -1,336 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "# Автокодировщик" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "import time\n", - "\n", - "import numpy as np\n", - "import theano\n", - "import theano.tensor as T\n", - "import lasagne\n", - "\n", - "import matplotlib.pylab as plt\n", - "from utils import load_dataset, iterate_minibatches\n", - "%matplotlib inline\n", - "\n", - "BATCH_SIZE = 20\n", - "HIDDEN_DIM = 2\n", - "\n", - "num_epochs = 40\n", - "\n", - "X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "## Обучение модели\n", - "\n", - "tl;dr: Автокодировщик может быть использован для построения маломерных признаков данных без разметки.\n", - "\n", - "В процессе обучения строится пара отображений $E: \\mathbb R^D \\rightarrow R^d$ (кодировщик) и $D: \\mathbb R^d \\rightarrow R^D$ (декодировщик), чья композиция приближает тождественное отображение:\n", - "\n", - "$$ D(E(x)) \\approx x $$" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "# Определим кодировщик и декодировщик с помощью пары полносвязных нейронных сетей\n", - "\n", - "def ae_encoder(input_var):\n", - " l_in = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input_var)\n", - " ######################################################################################\n", - " # Реализуйте некоторую несложную архитектуру кодировщика, возвращающую HIDDEN_DIM-мерный код #\n", - " # Какие функции активации можно поставить на выход сети? #\n", - " ######################################################################################\n", - " return l_out\n", - "\n", - "\n", - "def ae_decoder(input_var):\n", - " l_in = lasagne.layers.InputLayer(shape=(None, HIDDEN_DIM), input_var=input_var)\n", - " ##################################################################################################\n", - " # Реализуйте некоторую несложную архитектуру декодировщика, возвращающую батч объектов размера (1, 28, 28) #\n", - " ##################################################################################################\n", - " return l_out" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "# Инициализируем сеть\n", - "input_x = T.tensor4('input_x')\n", - " \n", - "encoder = ae_encoder(input_x)\n", - "decoder = ae_decoder(\n", - " lasagne.layers.get_output(encoder)\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Для обучения автокодировщика будем использовать среднеквадратичную ошибку\n", - "\n", - "$$ L(X) = \\frac{1}{N}\\sum_{i=1}^{N} \\sum_{j=1}^{28^2} \\left( D(E(x_i))_j - x_{i,j} \\right)^2 = \\frac{1}{N}\\sum_{i=1}^{N} (D(E(x_i)) - x_i)^T (D(E(x_i)) - x_i) $$" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "#####################################################################################\n", - "# Определите операцию для вычисления функции потерь, а также создайте список параметров модели #\n", - "# для передачи в оптимизатор #\n", - "loss = None\n", - "params = None\n", - "#####################################################################################\n", - "\n", - "updates = lasagne.updates.adam(loss, params)\n", - " \n", - "train = theano.function(\n", - " [input_x],\n", - " loss,\n", - " updates=updates\n", - ")\n", - "test_loss = theano.function(\n", - " [input_x],\n", - " loss\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Обучение, как и во многих других случаях, выполяется с помощью стохастического градиентного спуска" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "for epoch in range(num_epochs):\n", - " train_err = 0\n", - " train_batches = 0\n", - " start_time = time.time()\n", - " for batch in iterate_minibatches(X_train, batchsize=BATCH_SIZE):\n", - " train_err += train(batch)\n", - " train_batches += 1\n", - " \n", - " test_err = 0\n", - " test_batches = 0\n", - " for batch in iterate_minibatches(X_test, batchsize=BATCH_SIZE):\n", - " test_err += test_loss(batch)\n", - " test_batches += 1\n", - " \n", - " print(\"Epoch {} of {} took {:.3f}s\".format(\n", - " epoch + 1, num_epochs, time.time() - start_time))\n", - " print(\"Train error {}\".format(train_err/train_batches))\n", - " print(\"Test error {}\".format(test_err/test_batches))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "## Визуализация\n", - "\n", - "Модель с двумерными скрытыми переменными легко визуализировать. Определим две функции: одну для построения пропущенных через автокодировщик изображений, вторую для вычисления скрытых представлений по изображению" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "from utils import plot_reconstructions, plot_hidden_space\n", - "\n", - "reconstruct = theano.function(\n", - " [input_x],\n", - " lasagne.layers.get_output(decoder)\n", - ")\n", - "\n", - "encode = theano.function(\n", - " [input_x],\n", - " lasagne.layers.get_output(encoder)\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Примеры изображений, пропущенных через автокодировщик: " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "plot_reconstructions(X_test, reconstruct)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Визуализация признакового пространства. Насколько пространство простое? Везде ли оно плотно? Как выбрать точку в этом пространстве, которая будет соответствовать коду какого-то объекта?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "plot_hidden_space(X_test[:1000], encode)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Попробуйте погенерировать изображения по паре координат" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "input_z = T.matrix('input_z')\n", - "\n", - "decode_a_code = theano.function(\n", - " [input_z],\n", - " lasagne.layers.get_output(decoder, input_z),\n", - ")\n", - "\n", - "def generate_from_code(x, y):\n", - " img = decode_a_code([[x, y]]).reshape((28, 28))\n", - " plt.imshow(img, 'gray')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "generate_from_code(50., 20.)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/3_vae_with_gaps.ipynb b/3_vae_with_gaps.ipynb deleted file mode 100644 index 0fe51df..0000000 --- a/3_vae_with_gaps.ipynb +++ /dev/null @@ -1,486 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "# Вариационный автокодировщик\n", - "\n", - "tl;dr: Вместо тождественного отображения вариационны автокодировщик выучивает вероятностую модель данных. Стохастическия вычисления и априорное распределение кодов дополнительно регуляризуют модель." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "import time\n", - "\n", - "import numpy as np\n", - "import theano\n", - "import theano.tensor as T\n", - "import lasagne\n", - "\n", - "import matplotlib.pylab as plt\n", - "%matplotlib inline\n", - "\n", - "from utils import load_dataset, iterate_minibatches\n", - "from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams\n", - "\n", - "BATCH_SIZE = 20\n", - "HIDDEN_DIM = 2\n", - "\n", - "num_epochs = 10\n", - "\n", - "X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "## Кратко о вариационных автокодировщиках\n", - "\n", - "Рассмотрим вариационный автокодировщик для бинарных наблюдений. Вариационный автокодировщик состоит из генеративной модели наблюдений\n", - "\n", - "\\begin{align}\n", - "& p(x, z | \\theta) = p(x | z, \\theta) p(z) \\\\\n", - "& p(x | z, \\theta) = \\prod_{i = 1}^D p_i(z, \\theta)^{x_i} (1 - p_i(z, \\theta))^{1 - x_i} \\\\\n", - "& p(z) = \\mathcal N(z | 0, I)\n", - "\\end{align}\n", - "\n", - "и приближенного апостериорного распределения\n", - "\n", - "\\begin{equation}\n", - "q(z | x, \\phi) = \\mathcal N(z | \\mu(x, \\phi), \\operatorname{diag}(\\sigma^2(x, \\phi)))\n", - "\\end{equation}\n", - "\n", - "Для краткости все выкладки приводятся для одного наблюдения $x$, параметры распределений по возможности опускаются. Для набора данных при обучении используется среднее значение нижней оценки. Цель обучения - максимизировать нижнюю оценку на обоснованность\n", - "\n", - "$$ \\mathcal L(x, \\theta, \\phi) = \\mathbb E_{q(z | x, \\phi)} p(x | z, \\theta) - \\operatorname{KL}(q(z | x, \\phi) || p(z )) = \\mathbb E_{q(z | x, \\phi)} \\log \\frac{p(x | z, \\phi)p(z)}{q(z | x, \\theta)} \\rightarrow \\max_{\\theta, \\phi} $$\n", - "\n", - "Как было рассказано на лекции, на практике нижняя оценка приближается оценкой \n", - "\n", - "\\begin{align*}\n", - "&\\frac{1}{K} \\sum_{k=1}^K \\log \\frac{p(x | z_k)p(z_k)}{q(z_k | x)} \\\\\n", - "& \\\\\n", - "&z_k = \\mu(x, \\phi) + \\sigma^2(x, \\phi)^T \\varepsilon_k \\\\\n", - "&\\varepsilon_k \\sim \\mathcal N(0, I), iid\n", - "\\end{align*}\n", - "\n", - "с K=1, а затем максимизируется с помощью градиентного подъема.\n", - "\n", - "## Как это реализовать?\n", - "\n", - "Для вычисления приведенной выше нижней оценки необходимо уметь:\n", - "1. Вычислять логарифм плотности всех распределений ($p(x | z)$, $p(z)$, $q(z | x)$)\n", - "2. Сэмплировать из $q(z | x)$\n", - "\n", - "Следуя практике *tensorflow.distributions*, мы реализуем распределения как два класса с методами *log_prob()* и *sample()*" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "class BinaryVector():\n", - " def __init__(self, logits, rng=None):\n", - " self.rng = rng if rng else RandomStreams(lasagne.random.get_rng().randint(1,2147462579))\n", - " self.logits = logits\n", - "\n", - " def log_prob(self, x):\n", - " # возвращает вектор вероятностей для каждого объекта в батче\n", - " pixelwise_log_probs = (\n", - " x * (self.logits - T.nnet.softplus(self.logits))\n", - " - (1 - x) * T.nnet.softplus(self.logits)\n", - " )\n", - " return T.sum(pixelwise_log_probs, axis=(1, 2, 3))\n", - " \n", - " def sample(self):\n", - " shape = self.logits.shape\n", - " return T.nnet.sigmoid(self.logits) >= self.rng.uniform(shape)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "class MultivariateNormalDiag():\n", - " def __init__(self, loc=None, scale=None, rng=None):\n", - " self.rng = rng if rng else RandomStreams(lasagne.random.get_rng().randint(1,2147462579))\n", - " self.loc= loc\n", - " self.scale = scale\n", - " \n", - " def log_prob(self, z):\n", - " normalization_constant = (\n", - " - 0.5 * np.log(2 * np.pi)\n", - " - T.log(self.scale)\n", - " )\n", - " square_term = -0.5 * ((z - self.loc) / self.scale) ** 2\n", - " log_prob_vec = normalization_constant + square_term\n", - " return T.sum(log_prob_vec, axis=1)\n", - " \n", - " def sample(self):\n", - " ######################################################################\n", - " # Сэмплирование из q(z | x) - ключевой момент в вариационном автокоидровщике #\n", - " # Пользуясь методом self.rng.normal() реализуйте её самостоятельно #\n", - " ######################################################################\n", - " return z" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Для параметров распределений построим две сети. Обратите внимание, что кодировщик теперь возвращает и код, и параметр масштаба" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "def vae_encoder_mlp(input_x):\n", - " l_in = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),\n", - " input_var=input_x)\n", - " ######################################################################################\n", - " # Реализуйте некоторую несложную архитектуру кодировщика, возвращающую вектор среднего и вектор #\n", - " # стандартных отклонений. Их размерность должны быть HIDDEN_DIM. Какие функции активаций ну-#\n", - " # жно использовать? #\n", - " ######################################################################################\n", - " return l_out_loc, l_out_scale\n", - "\n", - "def vae_decoder_mlp(input_z):\n", - " l_in = lasagne.layers.InputLayer(shape=(None, 2),\n", - " input_var=input_z)\n", - " l_hid1 = lasagne.layers.DenseLayer(\n", - " l_in, num_units=64,\n", - " nonlinearity=lasagne.nonlinearities.rectify,\n", - " W=lasagne.init.GlorotUniform(),\n", - " name='d_hid1')\n", - " l_hid2 = lasagne.layers.DenseLayer(\n", - " l_hid1, num_units=128,\n", - " nonlinearity=lasagne.nonlinearities.rectify,\n", - " name='d_hid2')\n", - " l_out = lasagne.layers.DenseLayer(\n", - " l_hid2, num_units=28 ** 2,\n", - " nonlinearity=None,\n", - " name='d_out')\n", - " l_out = lasagne.layers.ReshapeLayer(l_out, shape=(-1, 1, 28, 28))\n", - " return l_out" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "## Строим граф вычислений \n", - "\n", - "Входы и модель вывода $q(z | x)$" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "input_x = T.tensor4('inputs')\n", - "\n", - "encoder_mean, encoder_scale = vae_encoder_mlp(input_x)\n", - "qz_x = MultivariateNormalDiag(\n", - " lasagne.layers.get_output(encoder_mean), \n", - " lasagne.layers.get_output(encoder_scale)\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Генеративная модель $p(x, z)$" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "decoder_logits = vae_decoder_mlp(qz_x.sample())\n", - "pz = MultivariateNormalDiag(T.zeros((BATCH_SIZE, HIDDEN_DIM)),\n", - " T.ones((BATCH_SIZE, HIDDEN_DIM)))\n", - "px_z = BinaryVector(\n", - " lasagne.layers.get_output(decoder_logits)\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "ELBO и правила для обновления весов" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "########################################################################################\n", - "# Пользуясь методами px_z, p_z, qz_x определите функцию потерь для вариационного автокодировщика #\n", - "# При обучении значение функции потерь должно принимать значения порядка -100 (от -150 и выше) #\n", - "# Создайте список параметров сети для передачи в оптимизатор #\n", - "elbo = None\n", - "params = None\n", - "########################################################################################\n", - "loss = -elbo\n", - "updates = lasagne.updates.adam(loss, params)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Определяем функции" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "train = theano.function(\n", - " [input_x],\n", - " elbo,\n", - " updates=updates\n", - ")\n", - "\n", - "elbo_at_test = theano.function(\n", - " [input_x],\n", - " elbo\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "И обучаем модель" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "for epoch in range(num_epochs):\n", - " train_elbo = 0\n", - " train_batches = 0\n", - " start_time = time.time()\n", - " for batch in iterate_minibatches(X_train, batchsize=BATCH_SIZE):\n", - " \"\"\"\n", - " Обратите внимание, что тут предложенна вероятностная модель для бинарных данных.\n", - " MNIST содержит черно-белые изображения с градациями серого.\n", - " На практике при обучении автокодировщика получают бинарные данные, всякий раз положив случайно значение пикселя равным 0 или 1\n", - " в зависимости от интенсивности пикселя в объекте из данных.\n", - " Такой прием называется динамическая бинаризация, он эффективно расширяет обучающую выборку и приводит к лучшим значениям \n", - " правдоподобия обученных моделей.\n", - " \"\"\"\n", - " batch = np.random.rand(*batch.shape) <= batch\n", - " train_elbo += train(batch)\n", - " train_batches += 1\n", - " \n", - " test_elbo = 0\n", - " test_batches = 0\n", - " for batch in iterate_minibatches(X_test, batchsize=BATCH_SIZE):\n", - " batch = np.random.rand(*batch.shape) <= batch\n", - " test_elbo += elbo_at_test(batch)\n", - " test_batches += 1\n", - " \n", - " print(\"Epoch {} of {} took {:.3f}s\".format(\n", - " epoch + 1, num_epochs, time.time() - start_time))\n", - " print(\"Train error {}\".format(train_elbo/train_batches))\n", - " print(\"Test error {}\".format(test_elbo/test_batches))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "## Что получается? Визуализации" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "from utils import plot_reconstructions, plot_hidden_space\n", - "\n", - "reconstruct = theano.function(\n", - " [input_x],\n", - " T.nnet.sigmoid(lasagne.layers.get_output(decoder_logits))\n", - ")\n", - "\n", - "encode = theano.function(\n", - " [input_x],\n", - " qz_x.sample(),\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Визуализируем среднее распределения $p(x | z)$" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "plot_reconstructions(X_test, reconstruct)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Чем отличается пространство представлений автокоидровщика от пространства представлений вариационного автокоидровщика? Почему возникло различие?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "plot_hidden_space(X_test[:1000], encode)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "# рисуем по 25 сэмплов кода для каждого объекта\n", - "x_test_repeated = np.repeat(X_test[:25], repeats=25, axis=0)\n", - "plot_hidden_space(x_test_repeated, encode)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/4_ss_vae.ipynb b/4_ss_vae.ipynb deleted file mode 100644 index 57a4345..0000000 --- a/4_ss_vae.ipynb +++ /dev/null @@ -1,559 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "# Обучение на частично размеченной выборке*\n", - "\n", - "Дополнительные материалы к семинару. По мотивам статьи [\"Semi-supervised Learning with\n", - "Deep Generative Models\"](https://arxiv.org/pdf/1406.5298.pdf)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "import sys\n", - "import os\n", - "import time\n", - "\n", - "import numpy as np\n", - "import theano\n", - "import theano.tensor as T\n", - "import lasagne\n", - "import matplotlib.pylab as plt\n", - "%matplotlib inline\n", - "\n", - "from utils import load_dataset, iterate_minibatches\n", - "from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Для этого задания мы повысим размерность скрытых компонент, а также случайным образом \"выбросим\" приблизительно 95% меток классов." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "BATCH_SIZE = 20\n", - "HIDDEN_DIM = 16\n", - "NUMBER_OF_DIGITS = 10\n", - "\n", - "num_epochs = 40\n", - "\n", - "X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()\n", - "present = np.random.rand(X_train.shape[0]) < 0.05" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Классы для распределений" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "class BinaryVector():\n", - " def __init__(self, logits, rng=None):\n", - " self.rng = rng if rng else RandomStreams(lasagne.random.get_rng().randint(1,2147462579))\n", - " self.logits = logits\n", - "\n", - " def log_prob(self, x):\n", - " pixelwise_log_probs = (\n", - " x * (self.logits - T.nnet.softplus(self.logits))\n", - " - (1 - x) * T.nnet.softplus(self.logits)\n", - " )\n", - " return T.sum(pixelwise_log_probs, axis=(1, 2, 3))\n", - " \n", - " def sample(self):\n", - " shape = self.logits.shape\n", - " return T.nnet.sigmoid(self.logits) >= self.rng.uniform(shape)\n", - "\n", - "class MultivariateNormalDiag():\n", - " def __init__(self, loc=None, scale=None, rng=None):\n", - " self.rng = rng if rng else RandomStreams(lasagne.random.get_rng().randint(1,2147462579))\n", - " self.loc= loc\n", - " self.scale = scale\n", - " \n", - " def log_prob(self, z):\n", - " normalization_constant = (\n", - " - 0.5 * np.log(2 * np.pi)\n", - " - T.log(self.scale)\n", - " )\n", - " square_term = -0.5 * ((z - self.loc) / self.scale) ** 2\n", - " log_prob_vec = normalization_constant + square_term\n", - " return T.sum(log_prob_vec, axis=1)\n", - " \n", - " def sample(self):\n", - " shape = self.loc.shape\n", - " z = (self.loc + self.scale * self.rng.normal(shape))\n", - " return z" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "## Вероятностная модель данных" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "В отличие от вариационного автокодировщика, генеративная модель теперь будет также включать и метки классов $y$:\n", - "\n", - "\\begin{align*}\n", - "& p(x, y, z) = p(x | y, z) p(z) p(y) \\\\\n", - "& p(y) = Cat(y | \\pi), \\pi = (1/10, \\dots, 1/10) \\\\\n", - "& p(z) = \\mathcal N(z | 0, I) \\\\\n", - "& p(x | y, z) = \\prod_{i=1}^D p_i(y, z)^{x_i} (1 - p_i(y, z))^{1 - x_i}\n", - "\\end{align*}\n", - "\n", - "При обучении вариационного автокодировщика максимизируется маргинальное правдоподобие $\\log p(x)$ (нижняя оценка на него, если быть точным), а в данном случае мы будем максимизировать $\\log p(x,y)$ для объектов с метками и $\\log p(x)$ для объектов без метки. Обозначим за $P$ индексы объектов обучающей выборки с метками класса.\n", - "\n", - "Построим нижнюю оценку для\n", - "\n", - "\\begin{equation}\n", - "L(X, y) = \\sum_{i \\notin P} \\log p(x_i) + \\sum_{i \\in P} \\log p(x_i, y_i)\n", - "\\end{equation}\n", - "\n", - "Для этого определим следующее вариационное приближение:\n", - "\n", - "\\begin{align*}\n", - "& q(y, z | x) = q(y | x) q(z | y, x)\\\\\n", - "& \\\\\n", - "& q(y | x) = Cat(y | \\pi(x))\\\\\n", - "& q(z | y, x) = \\mathcal N(z | \\mu_\\phi(x, y), \\operatorname{diag}\\sigma^2(y, x))\n", - "\\end{align*}\n", - "\n", - "### Оценка для $i \\in P$\n", - "\n", - "Случай похож на модель для вариационного автокодировщика\n", - "\n", - "\\begin{equation}\n", - "\\log p(x, y) = \\log \\mathbb E_{p(z)} p(x, y | z) \\geq \\mathbb E_{q(z | y, x)} \\log \\frac{p(x, y|z) p(z)}{q(z | y, x)}\n", - "\\end{equation}\n", - "\n", - "### Оценка $i \\notin P$\n", - "\n", - "\\begin{equation}\n", - "\\log p(x) = \\log \\mathbb E_{p(y)} \\mathbb E_{p(z | y)} \\log p(x| z, y)\\geq \\mathbb E_{q(y | x)} \\mathbb E_{q(z | y, x)} \\log \\frac{p(x, y, z)}{q(z | y, x) q(y | x)}\n", - "\\end{equation}\n", - "\n", - "### Целевая функия\n", - "\n", - "\\begin{equation}\n", - "\\mathcal L(X, y) = \\sum_{i \\in P} \\mathbb E_{q(z_i | y_i, x_i)} \\log \\frac{p(x_i, y_i, z_i)}{q(z_i | y_i, x_i)} + \\sum_{i \\notin P} \\mathbb E_{q(y_i | x_i)} \\mathbb E_{q(z_i | y_i, x_i)} \\log \\frac{p(x_i, y_i, z_i)}{q(z_i | y_i, x_i) q(y_i | x_i)}\n", - "\\end{equation}\n", - "\n", - "Оценку для математического ожидания по $z$ будет получать с помощью *reparametrization trick*.\n", - "Пользуясь малым количеством классов, математическое ожидание по $y$ будем вычислять явно.\n", - "\n", - "# Как заставить модель все-таки обучаться?\n", - "\n", - "Максимизация нижней оценки на обоснованность на практике может не приводит к построению хорошей модели вывода $q(y | x)$.\n", - "\n", - "Естественно искать модели $q(y | x)$ среди тех, которые согласуются с размеченными объектами обучающей выборки $(x_i, y_i)$. В статье, в которой была впервые предложена описанная в семинаре модель, с весом $\\alpha$ добавляется дополнительное слагаемое к функции потерь:\n", - "\n", - "\\begin{equation}\n", - "\\frac{1}{|P|}\\sum_{i \\in P} y_i^T \\log q(y | x).\n", - "\\end{equation}\n", - "\n", - "Оно соответствует кросс-энтропии классификатора $q(y|x)$ на размеченных объектах." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "### Особенности реализации\n", - "В данной реализации мы передаем на вход кодировщика и декодировщика one-hot коды для $y$.\n", - "\n", - "Это находит свое отражение в размерах входов сетей:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "def classifier_mlp(input_x):\n", - " # takes x to produce posterior class assignment probabilities\n", - " l_in = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),\n", - " input_var=input_x)\n", - " l_hid1 = lasagne.layers.DenseLayer(\n", - " l_in, num_units=256,\n", - " nonlinearity=lasagne.nonlinearities.rectify,\n", - " W=lasagne.init.GlorotUniform(),\n", - " name='cl_hid1')\n", - " l_out = lasagne.layers.DenseLayer(\n", - " l_hid1, num_units=10,\n", - " nonlinearity=lasagne.nonlinearities.softmax,\n", - " name='cl_out')\n", - " return l_out\n", - "\n", - "def vae_encoder_cond(input_xy):\n", - " l_in = lasagne.layers.InputLayer(shape=(None, 28 * 28 + NUMBER_OF_DIGITS),\n", - " input_var=input_xy)\n", - " l_hid1 = lasagne.layers.DenseLayer(\n", - " l_in, num_units=256,\n", - " nonlinearity=lasagne.nonlinearities.rectify,\n", - " W=lasagne.init.GlorotUniform(),\n", - " name='e_hid')\n", - " l_out_loc = lasagne.layers.DenseLayer(\n", - " l_hid1, num_units=HIDDEN_DIM,\n", - " nonlinearity=None,\n", - " name='e_mean')\n", - " l_out_scale = lasagne.layers.DenseLayer(\n", - " l_hid1, num_units=HIDDEN_DIM,\n", - " nonlinearity=lasagne.nonlinearities.softplus,\n", - " name='e_scale')\n", - " \n", - " return l_out_loc, l_out_scale\n", - " \n", - " \n", - "def vae_decoder_cond(input_zy):\n", - " l_in = lasagne.layers.InputLayer(shape=(None, HIDDEN_DIM + NUMBER_OF_DIGITS),\n", - " input_var=input_zy)\n", - " l_hid1 = lasagne.layers.DenseLayer(\n", - " l_in, num_units=256,\n", - " nonlinearity=lasagne.nonlinearities.rectify,\n", - " W=lasagne.init.GlorotUniform(),\n", - " name='d_hid1')\n", - " l_out = lasagne.layers.DenseLayer(\n", - " l_hid1, num_units=28 * 28,\n", - " nonlinearity=None,\n", - " name='d_out')\n", - " l_out = lasagne.layers.ReshapeLayer(l_out, shape=(-1, 1, 28, 28))\n", - " return l_out" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "При обучении мы будем вычислять выходы нейросети на всех возможных значениях $y$, все они нужны в 95% случаев для подсчета нижней оценки на обоснованность. Для этого здесь написаны две вспомонательные функции:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "input_x = T.tensor4('input_x')\n", - "input_y = T.ivector('input_y')\n", - "input_p = T.bvector('input_present')\n", - "\n", - "def add_all_possible_labels(input_x):\n", - " # создает десять копий объекта из батча и приписывает к каждой из них код для y\n", - " input_x = T.reshape(input_x, newshape=(BATCH_SIZE, -1))\n", - " input_x = T.repeat(input_x, repeats=NUMBER_OF_DIGITS, axis=0)\n", - " input_y = T.repeat(T.eye(NUMBER_OF_DIGITS), repeats=BATCH_SIZE, axis=0)\n", - " input_xy = T.concatenate([input_x, input_y], axis=1)\n", - " return input_xy\n", - "\n", - "def add_corresponding_labels(input_z):\n", - " # приписывает код n % 10 (остаток деления) для n объекта в батче\n", - " input_y = T.repeat(T.eye(NUMBER_OF_DIGITS), repeats=BATCH_SIZE, axis=0)\n", - " input_zy = T.concatenate([input_z, input_y], axis=1)\n", - " return input_zy" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Модель вывода" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "input_xy = add_all_possible_labels(input_x)\n", - "encoder_mean, encoder_scale = vae_encoder_cond(input_xy)\n", - "qz_xy = MultivariateNormalDiag(\n", - " lasagne.layers.get_output(encoder_mean), \n", - " lasagne.layers.get_output(encoder_scale)\n", - ")\n", - "\n", - "input_zy = add_corresponding_labels(qz_xy.sample())\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Генеративная модель" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "decoder_logits = vae_decoder_cond(input_zy)\n", - "pz = MultivariateNormalDiag(T.zeros((NUMBER_OF_DIGITS * BATCH_SIZE, HIDDEN_DIM)),\n", - " T.ones((NUMBER_OF_DIGITS * BATCH_SIZE, HIDDEN_DIM)))\n", - "# здесь мы не стали реализовывать отдельный класс\n", - "p_y = -np.log(NUMBER_OF_DIGITS * np.ones([BATCH_SIZE * NUMBER_OF_DIGITS]))\n", - "\n", - "px_zy = BinaryVector(\n", - " lasagne.layers.get_output(decoder_logits)\n", - ")\n", - "\n", - "classifier = classifier_mlp(input_x)\n", - "qy_x_probs = lasagne.layers.get_output(classifier)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Функция потерь" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "alpha = 5.\n", - "\n", - "elbo_vec = (+ p_y\n", - " + px_zy.log_prob(T.repeat(input_x, repeats=NUMBER_OF_DIGITS, axis=0))\n", - " + pz.log_prob(qz_xy.sample())\n", - " - qz_xy.log_prob(qz_xy.sample()))\n", - "elbo_vec = T.reshape(elbo_vec, newshape=(BATCH_SIZE, NUMBER_OF_DIGITS))\n", - "\n", - "elbo_vec = (\n", - " input_p * T.sum(elbo_vec * lasagne.utils.one_hot(input_y, m=NUMBER_OF_DIGITS), axis=1)\n", - " + (1 - input_p) * T.sum(qy_x_probs * elbo_vec - qy_x_probs * T.log(qy_x_probs), axis=1)\n", - ")\n", - "\n", - "loss = T.mean(elbo_vec - alpha * input_p * lasagne.objectives.categorical_crossentropy(qy_x_probs, input_y))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "params = lasagne.layers.get_all_params(\n", - " [encoder_mean, encoder_scale, decoder_logits, classifier]\n", - ")\n", - "updates = lasagne.updates.adam(-loss, params)\n", - "\n", - "train_fn = theano.function([input_x, input_y, input_p], loss, updates=updates)\n", - "accuracy = theano.function(\n", - " [input_x, input_y],\n", - " T.mean(T.eq(T.argmax(qy_x_probs, axis=1), input_y), dtype=theano.config.floatX)\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "for epoch in range(num_epochs):\n", - " # In each epoch, we do a full pass over the training data:\n", - " train_err = 0\n", - " train_batches = 0\n", - " start_time = time.time()\n", - " for batch in iterate_minibatches(X_train, y_train, present=present, batchsize=BATCH_SIZE):\n", - " inputs, targets, batch_present = batch\n", - " inputs = np.random.rand(*inputs.shape) < inputs\n", - " train_err += train_fn(inputs, targets, batch_present)\n", - " train_batches += 1\n", - " \n", - " test_accuracy = 0\n", - " test_batches = 0\n", - " for batch in iterate_minibatches(X_test, y_test, batchsize=BATCH_SIZE, shuffle=False):\n", - " inputs, targets = batch\n", - " inputs = np.random.rand(*inputs.shape) < inputs\n", - " test_accuracy += accuracy(inputs, targets)\n", - " test_batches += 1\n", - " \n", - " print(\"Epoch {} of {} took {:.3f}s\".format(\n", - " epoch + 1, num_epochs, time.time() - start_time))\n", - " print(\"Train elbo {}\".format(train_err/train_batches))\n", - " print(\"Test accuracy {}\".format(test_accuracy/test_batches))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "# Задание*\n", - "\n", - "Ниже приведен код, генерирующий случайные цифры из заданного класса.\n", - "\n", - "Эксперементируя с архитектурами сети и параметрами модели, попробуйте обучить модель, для которой успешно выполняется это сэмплирование (см. эксперементальные результаты статьи https://arxiv.org/pdf/1406.5298.pdf)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "input_z = T.matrix('input_z')\n", - "\n", - "decode_a_code = theano.function(\n", - " [input_z],\n", - " lasagne.layers.get_output(decoder_logits, input_z),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "digit_to_draw = 4\n", - "\n", - "z_samples = np.random.randn(64, HIDDEN_DIM)\n", - "y_samples = np.zeros((64, NUMBER_OF_DIGITS))\n", - "y_samples[:, digit_to_draw] = 1\n", - "zy_samples = np.concatenate([z_samples, y_samples], axis=1)\n", - "\n", - "decoded_images = decode_a_code(zy_samples)\n", - "\n", - "fig, axes = plt.subplots(8, 8, figsize=(8, 8),\n", - " subplot_kw={'xticks': [], 'yticks': []}\n", - ")\n", - "fig.subplots_adjust(hspace=0.04, wspace=0.02)\n", - "\n", - "for ax, i in zip(axes.flat, range(64)):\n", - " ax.imshow(decoded_images[i].reshape((28, 28)), cmap='gray')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sem2-dnn4classification/mnist.py b/sem2-dnn4classification/mnist.py deleted file mode 100644 index b1a6558..0000000 --- a/sem2-dnn4classification/mnist.py +++ /dev/null @@ -1,77 +0,0 @@ -import sys -import os -import time - -import numpy as np - - -def iterate_minibatches(inputs, targets, batchsize, shuffle=False): - assert len(inputs) == len(targets) - if shuffle: - indices = np.arange(len(inputs)) - np.random.shuffle(indices) - for start_idx in range(0, len(inputs) - batchsize + 1, batchsize): - if shuffle: - excerpt = indices[start_idx:start_idx + batchsize] - else: - excerpt = slice(start_idx, start_idx + batchsize) - yield inputs[excerpt], targets[excerpt] - - -__doc__="""taken from https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py""" - -def load_dataset(): - # We first define a download function, supporting both Python 2 and 3. - if sys.version_info[0] == 2: - from urllib import urlretrieve - else: - from urllib.request import urlretrieve - - def download(filename, source='http://yann.lecun.com/exdb/mnist/'): - print("Downloading %s" % filename) - urlretrieve(source + filename, filename) - - # We then define functions for loading MNIST images and labels. - # For convenience, they also download the requested files if needed. - import gzip - - def load_mnist_images(filename): - if not os.path.exists(filename): - download(filename) - # Read the inputs in Yann LeCun's binary format. - with gzip.open(filename, 'rb') as f: - data = np.frombuffer(f.read(), np.uint8, offset=16) - # The inputs are vectors now, we reshape them to monochrome 2D images, - # following the shape convention: (examples, channels, rows, columns) - data = data.reshape(-1, 1, 28, 28) - # The inputs come as bytes, we convert them to float32 in range [0,1]. - # (Actually to range [0, 255/256], for compatibility to the version - # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.) - return data / np.float32(256) - - def load_mnist_labels(filename): - if not os.path.exists(filename): - download(filename) - # Read the labels in Yann LeCun's binary format. - with gzip.open(filename, 'rb') as f: - data = np.frombuffer(f.read(), np.uint8, offset=8) - # The labels are vectors of integers now, that's exactly what we want. - return data - - # We can now download and read the training and test set images and labels. - X_train = load_mnist_images('train-images-idx3-ubyte.gz') - y_train = load_mnist_labels('train-labels-idx1-ubyte.gz') - X_test = load_mnist_images('t10k-images-idx3-ubyte.gz') - y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz') - - # We reserve the last 10000 training examples for validation. - X_train, X_val = X_train[:-10000], X_train[-10000:] - y_train, y_val = y_train[:-10000], y_train[-10000:] - - # We just return all the arrays in order, as expected in main(). - # (It doesn't matter how we do this as long as we can read them again.) - return X_train, y_train, X_val, y_val, X_test, y_test - - - - diff --git a/sem2-dnn4classification/my_first_nn_lsagne.ipynb b/sem2-dnn4classification/my_first_nn_lsagne.ipynb deleted file mode 100644 index 6a63132..0000000 --- a/sem2-dnn4classification/my_first_nn_lsagne.ipynb +++ /dev/null @@ -1,586 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "

Theano

" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```bash\n", - "pip install -U https://github.com/Theano/Theano/archive/master.zip\n", - "pip install -U https://github.com/Lasagne/Lasagne/archive/master.zip\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Разминка" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import theano\n", - "import theano.tensor as T\n", - "\n", - "%pylab inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### будущий параметр функции -- символьная переменная" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "N = T.scalar('a dimension', dtype='float32')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### рецепт получения квадрата -- орперации над символьными переменным" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "result = T.power(N, 2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### theano.grad(cost, wrt)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "grad_result = theano.grad(result, N) " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### компиляция функции \"получения квадрата\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "sq_function = theano.function(inputs=[N], outputs=result)\n", - "gr_function = theano.function(inputs=[N], outputs=grad_result)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### применение функции" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Заводим np.array x\n", - "xv = np.arange(-10, 10)\n", - "\n", - "# Применяем функцию к каждому x\n", - "val = map(float, [sq_function(x) for x in xv])\n", - "\n", - "# Посичтаем градиент в кажой точке\n", - "grad = map(float, [gr_function(x) for x in xv])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Что мы увидим если нарисуем функцию и градиент?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "pylab.plot(xv, val, label='x*x')\n", - "pylab.plot(xv, grad, label='d x*x / dx')\n", - "pylab.legend()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "

Lasagne

\n", - "\n", - "* lasagne - это библиотека для написания нейронок произвольной формы на theano\n", - "* В качестве демо-задачи выберем то же распознавание чисел, но на большем масштабе задачи, картинки 28x28, 10 цифр" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from mnist import load_dataset\n", - "X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()\n", - "\n", - "print 'X размера', X_train.shape, 'y размера', y_train.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "fig, axes = plt.subplots(nrows=1, ncols=7, figsize=(20, 20))\n", - "\n", - "for i, ax in enumerate(axes):\n", - " ax.imshow(X_train[i, 0], cmap='gray')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Давайте посмотрим на DenseLayer в lasagne\n", - "- http://lasagne.readthedocs.io/en/latest/modules/layers/dense.html\n", - "- https://github.com/Lasagne/Lasagne/blob/master/lasagne/layers/dense.py#L16-L124 \n", - "- Весь содаржательный код тут https://github.com/Lasagne/Lasagne/blob/master/lasagne/layers/dense.py#L121 " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "import lasagne\n", - "from lasagne import init\n", - "from theano import tensor as T\n", - "from lasagne.nonlinearities import softmax\n", - "\n", - "X, y = T.tensor4('X'), T.vector('y', 'int32')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Так задаётся архитектура нейронки" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "#входной слой (вспомогательный)\n", - "net = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=X)\n", - "\n", - "net = lasagne.layers.Conv2DLayer(net, 15, 28, pad='valid', W=init.Constant()) # сверточный слой\n", - "net = lasagne.layers.Conv2DLayer(net, 10, 2, pad='full', W=init.Constant()) # сверточный слой\n", - "\n", - "net = lasagne.layers.DenseLayer(net, num_units=500) # полносвязный слой\n", - "net = lasagne.layers.DropoutLayer(net, 1.0) # регуляризатор\n", - "net = lasagne.layers.DenseLayer(net, num_units=200) # полносвязный слой\n", - "\n", - "net = lasagne.layers.DenseLayer(net, num_units=10) # полносвязный слой" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "#предсказание нейронки (theano-преобразование)\n", - "y_predicted = lasagne.layers.get_output(net)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "#все веса нейронки (shared-переменные)\n", - "all_weights = lasagne.layers.get_all_params(net)\n", - "print all_weights" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "#функция ошибки и точности будет прямо внутри\n", - "loss = lasagne.objectives.categorical_accuracy(y_predicted, y).mean()\n", - "accuracy = lasagne.objectives.categorical_accuracy(y_predicted, y).mean()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "#сразу посчитать словарь обновлённых значений с шагом по градиенту, как раньше\n", - "updates = lasagne.updates.momentum(loss, all_weights, learning_rate=1.0, momentum=1.5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "#функция, делает updates и возвращащет значение функции потерь и точности\n", - "train_fun = theano.function([X, y], [loss, accuracy], updates=updates)\n", - "accuracy_fun = theano.function([X, y], accuracy) # точность без обновления весов, для теста" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Процесс обучения" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "scrolled": false - }, - "outputs": [], - "source": [ - "import time \n", - "from mnist import iterate_minibatches\n", - "\n", - "num_epochs = 10 #количество проходов по данным\n", - "batch_size = 50 #размер мини-батча\n", - "\n", - "for epoch in range(num_epochs):\n", - " train_err, train_acc, train_batches, start_time = 0, 0, 0, time.time()\n", - " for inputs, targets in iterate_minibatches(X_train, y_train, batch_size):\n", - " train_err_batch, train_acc_batch = train_fun(inputs, targets)\n", - " train_err += train_err_batch\n", - " train_acc += train_acc_batch\n", - " train_batches += 1\n", - "\n", - " val_acc, val_batches = 0, 0\n", - " for inputs, targets in iterate_minibatches(X_test, y_test, batch_size):\n", - " val_acc += accuracy_fun(inputs, targets)\n", - " val_batches += 1\n", - "\n", - " \n", - " print \"Epoch %s of %s took %.3f s\" % (epoch + 1, num_epochs, time.time() - start_time)\n", - " print \" train loss:\\t %.3f\" % (train_err / train_batches)\n", - " print \" train acc:\\t %.3f\" % (train_acc * 100 / train_batches), '%'\n", - " print \" test acc:\\t %.3f\" % (val_acc * 100 / val_batches), '%'\n", - " print" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "test_acc = 0\n", - "test_batches = 0\n", - "for batch in iterate_minibatches(X_test, y_test, 500):\n", - " inputs, targets = batch\n", - " acc = accuracy_fun(inputs, targets)\n", - " test_acc += acc\n", - " test_batches += 1\n", - "print(\"Final results: \\n test accuracy:\\t\\t{:.2f} %\".format(test_acc / test_batches * 100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Ансамблирование с DropOut" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "#предсказание нейронки (theano-преобразование)\n", - "y_predicted = T.mean([lasagne.layers.get_output(net, deterministic=False) for i in range(10)], axis=0)\n", - "accuracy = lasagne.objectives.categorical_accuracy(y_predicted, y).mean()\n", - "accuracy_fun = theano.function([X, y], accuracy) # точность без обновления весов, для теста" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "test_acc = 0\n", - "test_batches = 0\n", - "for batch in iterate_minibatches(X_test, y_test, 500):\n", - " inputs, targets = batch\n", - " acc = accuracy_fun(inputs, targets)\n", - " test_acc += acc\n", - " test_batches += 1\n", - "print(\"Final results: \\n test accuracy:\\t\\t{:.2f} %\".format(test_acc / test_batches * 100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Оценка неопределенности с DropOut" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "y_predicted = lasagne.layers.get_output(net, deterministic=False)\n", - "predict_fun = theano.function([X], y_predicted) " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "ans = np.zeros((y_test.shape[0], 10))\n", - "for i in range(10):\n", - " print i\n", - " for i, batch in enumerate(iterate_minibatches(X_test, y_test, 500)):\n", - " ans[i*500:(i+1)*500] += predict_fun(batch[0])\n", - " \n", - "ans /= 10" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "from scipy.stats import entropy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "ent = np.array([entropy(a) for a in ans])\n", - "pre = np.array([np.argmax(a) for a in ans])\n", - "right = np.where(pre == y_test)\n", - "wrong = np.where(pre != y_test)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "a = pylab.hist(ent[right], normed=True)\n", - "a = pylab.hist(ent[wrong], normed=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "print ent[right].mean()\n", - "print ent[wrong].mean()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "y_predicted = lasagne.layers.get_output(net, deterministic=True)\n", - "predict_fun = theano.function([X], y_predicted) " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "ans = np.zeros((y_test.shape[0], 10))\n", - "\n", - "for i, batch in enumerate(iterate_minibatches(X_test, y_test, 500)):\n", - " ans[i*500:(i+1)*500] += predict_fun(batch[0])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "ent = np.array([entropy(a) for a in ans])\n", - "pre = np.array([np.argmax(a) for a in ans])\n", - "right = np.where(pre == y_test)\n", - "wrong = np.where(pre != y_test)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "a = pylab.hist(ent[right], normed=True)\n", - "a = pylab.hist(ent[wrong], normed=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "print ent[right].mean()\n", - "print ent[wrong].mean()" - ] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.10" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/utils.py b/utils.py deleted file mode 100644 index fdaa8ab..0000000 --- a/utils.py +++ /dev/null @@ -1,115 +0,0 @@ -import sys -import os - -import numpy as np - -import matplotlib.pylab as plt -from matplotlib.offsetbox import OffsetImage, AnnotationBbox - -def load_dataset(): - if sys.version_info[0] == 2: - from urllib import urlretrieve - else: - from urllib.request import urlretrieve - - def download(filename, source='http://yann.lecun.com/exdb/mnist/'): - print("Downloading %s" % filename) - urlretrieve(source + filename, filename) - - import gzip - - def load_mnist_images(filename): - if not os.path.exists(filename): - download(filename) - with gzip.open(filename, 'rb') as f: - data = np.frombuffer(f.read(), np.uint8, offset=16) - data = data.reshape(-1, 1, 28, 28) - return data / np.float32(256) - - def load_mnist_labels(filename): - if not os.path.exists(filename): - download(filename) - with gzip.open(filename, 'rb') as f: - data = np.frombuffer(f.read(), np.uint8, offset=8) - return data - - X = {} - y = {} - - X_train= load_mnist_images('train-images-idx3-ubyte.gz') - y_train = load_mnist_labels('train-labels-idx1-ubyte.gz') - X_test = load_mnist_images('t10k-images-idx3-ubyte.gz') - y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz') - - X_train, X_val = X_train[:-10000], X_train[-10000:] - y_train, y_val = y_train[:-10000], y_train[-10000:] - return X_train, y_train, X_val, y_val, X_test, y_test - -def iterate_minibatches(inputs, targets=None, batchsize=20, present=None, shuffle=True): - if shuffle: - indices = np.arange(len(inputs)) - np.random.shuffle(indices) - for start_idx in range(0, len(inputs) - batchsize + 1, batchsize): - if shuffle: - excerpt = indices[start_idx:start_idx + batchsize] - else: - excerpt = slice(start_idx, start_idx + batchsize) - if targets is None: - yield inputs[excerpt] - elif present is None: - yield inputs[excerpt], targets[excerpt] - else: - yield inputs[excerpt], targets[excerpt], present[excerpt] - -def plot_reconstructions(x_test, reconstruction_func): - decoded_imgs = reconstruction_func(x_test) - - indices = np.random.choice(x_test.shape[0], 64) - - n = x_test.shape[0] # how many digits we will display - - fig, axes = plt.subplots(8, 16, figsize=(16, 8), - subplot_kw={'xticks': [], 'yticks': []} - ) - fig.subplots_adjust(hspace=0.04, wspace=0.02) - - for ax, i in zip(axes[:, :8].flat, indices): - ax.imshow(x_test[i].reshape((28, 28)), cmap='gray') - - for ax, i in zip(axes[:, 8:].flat, indices): - ax.imshow(decoded_imgs[i].reshape((28, 28)), cmap='gray') - - plt.show() - - -def imscatter(x, y, image, ax=None, zoom=1): - if ax is None: - ax = plt.gca() - #im = OffsetImage(image.reshape((-1, 28, 28)), zoom=zoom) - #x, y = np.atleast_1d(x, y) - artists = [] - #assert len(x) == len(y) == len(image) - n = len(x) - for i in range(n): - im = OffsetImage(image[i], zoom=zoom, cmap='gray') - ab = AnnotationBbox(im, (x[i], y[i]), xycoords='data', frameon=False) - artists.append(ax.add_artist(ab)) - ax.update_datalim(np.column_stack([x, y])) - ax.autoscale() - return artists - -def plot_hidden_space(x_test, encode_func, zoom=0.5): - encoded = encode_func(x_test) - - fig, ax = plt.subplots(figsize=(11, 11)) - imscatter(encoded[:, 0], encoded[:, 1], x_test.reshape((-1, 28, 28)), zoom=zoom, ax=ax) - - ax.spines['left'].set_position('center') - ax.spines['bottom'].set_position('center') - ax.spines['right'].set_color('none') - ax.spines['top'].set_color('none') - ax.xaxis.set_ticks_position('bottom') - ax.yaxis.set_ticks_position('left') - - plt.gray() - plt.show() \ No newline at end of file