notebook cleanup (#75)

Notebook cleanup.
amygdala · Sep 28, 2017 · c62cfa3 · c62cfa3
1 parent 49d5cce
commit c62cfa3
Showing 1 changed file with 72 additions and 101 deletions.
diff --git a/workshop_sections/mnist_series/mnist_estimator.ipynb b/workshop_sections/mnist_series/mnist_estimator.ipynb
@@ -30,48 +30,64 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import tensorflow as tf\n",
     "from tensorflow.examples.tutorials.mnist import input_data\n",
     "import numpy as np\n",
+    "import os\n",
+    "import time\n",
+    "\n",
+    "print(tf.__version__)\n",
     "\n",
-    "print(tf.__version__)"
+    "# define a utility function for generating a new directory in which to save \n",
+    "# model information, so multiple training runs don't stomp on each other.\n",
+    "def get_new_path(name=\"\"):\n",
+    "    base=\"/tmp/tfmodels/mnist_estimators\"\n",
+    "    logpath = os.path.join(base, name + \"_\" + str(int(time.time())))\n",
+    "    print(\"Logging to {}\".format(logpath))\n",
+    "    return logpath"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true,
     "scrolled": true
    },
    "outputs": [],
    "source": [
-    "# Train and evaluate\n",
+    "# Train\n",
     "\n",
     "DATA_SETS = input_data.read_data_sets(\n",
-    "    \"/tmp/fashion-mnist\")\n",
+    "    \"/tmp/MNIST_data\")\n",
     "\n",
     "feature_columns = [tf.feature_column.numeric_column(\n",
     "    \"pixels\", shape=784)]\n",
     "\n",
     "m = tf.estimator.LinearClassifier(\n",
     "        feature_columns=feature_columns, \n",
     "        n_classes=10,\n",
-    "        model_dir=\"/tmp/tfmodels/mnist_estimators/fashion_linear\")\n",
+    "        model_dir=get_new_path(\"linear\")\n",
+    "    )\n",
     "\n",
     "train_input_fn = tf.estimator.inputs.numpy_input_fn(\n",
     "        x={'pixels': DATA_SETS.train.images},\n",
     "        y=DATA_SETS.train.labels.astype(np.int64),\n",
     "        batch_size=100,\n",
     "        num_epochs=3,\n",
     "        shuffle=True)\n",
-    "m.train(input_fn=train_input_fn)\n",
-    "\n",
+    "m.train(input_fn=train_input_fn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Evaluate\n",
     "eval_input_fn = tf.estimator.inputs.numpy_input_fn(\n",
     "        x={'pixels': DATA_SETS.test.images},\n",
     "        y=DATA_SETS.test.labels.astype(np.int64),\n",
@@ -80,14 +96,13 @@
     "        shuffle=False)\n",
     "results = m.evaluate(input_fn=eval_input_fn)\n",
     "\n",
-    "print(results)\n"
+    "print(results)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true,
     "scrolled": true
    },
    "outputs": [],
@@ -112,7 +127,6 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true,
     "scrolled": true
    },
    "outputs": [],
@@ -158,7 +172,7 @@
     "from tensorflow.examples.tutorials.mnist import input_data\n",
     "\n",
     "# comment out for less info during the training runs.\n",
-    "tf.logging.set_verbosity(tf.logging.WARN)\n",
+    "tf.logging.set_verbosity(tf.logging.INFO)\n",
     "\n",
     "# confirm what version of TensorFlow you are running\n",
     "print('Running TensorFlow version {}'.format(tf.__version__))"
@@ -172,20 +186,11 @@
    "source": [
     "# Set locations of data files\n",
     "MNIST_DATA_DIR = \"/tmp/MNIST_data\"\n",
-    "FASHION_DATA_DIR = \"/tmp/fashion-mnist\"\n",
-    "# Select your choice of dataset\n",
-    "DATA_DIR = FASHION_DATA_DIR\n",
+    "# edit the following to reflect where you put the Fashion-MNIST local dir\n",
+    "FASHION_DATA_DIR = \"your-fashion-mnist-data\" \n",
     "\n",
     "# read in data, downloading first as necessary\n",
-    "DATA_SETS = input_data.read_data_sets(DATA_DIR)\n",
-    "\n",
-    "# define a utility function for generating a new directory in which to save \n",
-    "# model information, so multiple training runs don't stomp on each other.\n",
-    "def get_new_path(name=\"\"):\n",
-    "    base=\"/tmp/tfmodels/mnist_estimators\"\n",
-    "    logpath = os.path.join(base, name + \"_\" + str(int(time.time())))\n",
-    "    print(\"Logging to {}\".format(logpath))\n",
-    "    return logpath"
+    "DATA_SETS = input_data.read_data_sets(MNIST_DATA_DIR)"
    ]
   },
   {
@@ -227,7 +232,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "def define_and_run_linear_classifier(num_steps, \n",
@@ -247,7 +254,9 @@
     "        DATA_SETS.train, \n",
     "        epochs=3, \n",
     "        shuffle=True,\n",
-    "        batch_size=batch_size))\n",
+    "        batch_size=batch_size),\n",
+    "        steps=num_steps\n",
+    "        )\n",
     "    \n",
     "    print(\"Finished training.\")\n",
     "    \n",
@@ -302,14 +311,16 @@
     "        DATA_SETS.train, \n",
     "        epochs=3, \n",
     "        shuffle=True,\n",
-    "        batch_size=batch_size))\n",
+    "        batch_size=batch_size),\n",
+    "        steps=num_steps)\n",
     "\n",
     "    print(\"Finished running the deep training via the train() method\")\n",
     "    \n",
     "    accuracy_score = classifier.evaluate(input_fn=generate_input_fn(\n",
     "        DATA_SETS.test, batch_size=batch_size, shuffle=False, epochs=1))['accuracy']\n",
     "\n",
-    "    print('DNN Classifier Accuracy: {0:f}'.format(accuracy_score))"
+    "    print('DNN Classifier Accuracy: {0:f}'.format(accuracy_score))\n",
+    "    return classifier"
    ]
   },
   {
@@ -336,6 +347,13 @@
     "# With 1000 steps and a batch size of 40, we see accuracy of approx 91% for MNIST"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can point your model to an existing checkpoint directory rather than having it start from scratch:"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -344,9 +362,9 @@
    },
    "outputs": [],
    "source": [
-    "model_path = \"/tmp/tfmodels/mnist_estimators/linear_1505895781\" # This is an example\n",
-    "for i in range(0,5):\n",
-    "    define_and_run_linear_classifier(500, model_path, batch_size=40)"
+    "# model_path = \"/tmp/tfmodels/mnist_estimators/linear_1505895781\" # This is an example\n",
+    "# for i in range(0,5):\n",
+    "#    define_and_run_linear_classifier(500, model_path, batch_size=40)"
    ]
   },
   {
@@ -365,8 +383,8 @@
    "outputs": [],
    "source": [
     "print(\"Running DNN classifier with .1 learning rate...\")\n",
-    "define_and_run_dnn_classifier(\n",
-    "    num_steps=500, \n",
+    "classifier = define_and_run_dnn_classifier(\n",
+    "    num_steps=2000, \n",
     "    logdir=get_new_path(\"deep01\"), \n",
     "    lr=.1)\n",
     "# With 2000 steps and a batch size of 40, we see accuracy of approx 95% on MNIST"
@@ -376,28 +394,21 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Let's loop through the training-eval loop a couple of times, so we get more accuracy readings. Make a for-loop and provide a stable path for your model, which will allow continuous training-eval loops."
+    "If you downloaded Fashion-MNIST, let's see what MNIST and Fashion-MNIST results look like side by side. Change the path of the DATA_DIR to point to your fashion-mnist dataset, and run the training again. Be sure to change your model path."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true,
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "model_path = \"/tmp/tfmodels/mnist_estimators/deep01_1505896039\" # This is an example\n",
-    "for i in range(0,5):\n",
-    "    define_and_run_dnn_classifier(500, model_path, lr=.01)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "Let's see what MNIST and fashion-mnist look like side by side. Change the path of the DATA_DIR to point to your fashion-mnist dataset, and run the training again. Be sure to change your model path."
+    "DATA_SETS = input_data.read_data_sets(FASHION_DATA_DIR)\n",
+    "print(\"Running DNN classifier with Fashion-MNIST data and a .1 learning rate...\")\n",
+    "fclassifier = define_and_run_dnn_classifier(\n",
+    "    num_steps=2000, \n",
+    "    logdir=get_new_path(\"deep01f\"), \n",
+    "    lr=.1)"
    ]
   },
   {
@@ -411,16 +422,14 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true,
     "scrolled": true
    },
    "outputs": [],
    "source": [
     "print(\"Running DNN classifier with .5 learning rate...\")\n",
-    "classifier = define_and_run_dnn_classifier(2000, \n",
-    "                                           get_new_path(\"deep05\"), \n",
-    "                                           lr=.5)\n",
-    "# With 2000 steps and a batch size of 40, we see accuracy of approx 91%, though sometimes it does not converge at all."
+    "classifier5 = define_and_run_dnn_classifier(2000, \n",
+    "                                            get_new_path(\"deep05\"), \n",
+    "                                            lr=.5)\n"
    ]
   },
   {
@@ -432,6 +441,8 @@
     "```sh\n",
     "$ tensorboard --logdir=/tmp/tfmodels/mnist_estimators\n",
     "```\n",
+    "Look for it at localhost:6006\n",
+    "\n",
     "Or run the following (select Kernel --> Interrupt from the menu when you're done):"
    ]
   },
@@ -447,24 +458,10 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
+   "cell_type": "markdown",
+   "metadata": {},
    "source": [
-    "def generate_input_fn(dataset, batch_size=BATCH_SIZE, \n",
-    "                      shuffle=False, epochs=None):\n",
-    "    X = dataset.images\n",
-    "    Y = dataset.labels.astype(numpy.int64)\n",
-    "    return tf.estimator.inputs.numpy_input_fn(\n",
-    "        x={'pixels': X},\n",
-    "        y=Y,\n",
-    "        batch_size=batch_size,\n",
-    "        num_epochs=epochs,\n",
-    "        shuffle=shuffle\n",
-    "    )"
+    "We can again make some predictions using our trained models.  Assuming you ran the Fashion-MNIST training, `DATA_SETS` now points to that dataset."
    ]
   },
   {
@@ -482,25 +479,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model_path = \"/tmp/tfmodels/mnist_estimators/deep01_1506395529\" # This is an example\n",
-    "\n",
-    "feature_columns = [tf.feature_column.numeric_column(\n",
-    "        \"pixels\", shape=784)]\n",
-    "classifier = tf.estimator.DNNClassifier(\n",
-    "        feature_columns=feature_columns, \n",
-    "        n_classes=10,\n",
-    "        hidden_units=[200, 100, 50],\n",
-    "        optimizer=tf.train.ProximalAdagradOptimizer(learning_rate=0.1),\n",
-    "        model_dir=model_path)\n",
-    "    \n",
     "X = DATA_SETS.test.images[5000:5005]\n",
     "predict_input_fn = tf.estimator.inputs.numpy_input_fn(        \n",
     "        x={'pixels': X},\n",
     "        batch_size=1,\n",
     "        num_epochs=1,\n",
     "        shuffle=False)\n",
     "\n",
-    "predictions = classifier.predict(input_fn=predict_input_fn)\n",
+    "# if you did not run the Fashion-MNIST training, edit the following to point\n",
+    "# to 'classifier' instead (the 'regular' MNIST model).\n",
+    "predictions = fclassifier.predict(input_fn=predict_input_fn)\n",
     "\n",
     "for prediction in predictions:\n",
     "    print(\"Predictions:    {} with probabilities {}\\n\".format(prediction[\"classes\"], prediction[\"probabilities\"]))"
@@ -544,23 +532,6 @@
   }
  ],
  "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.13"
-  }
  },
  "nbformat": 4,
  "nbformat_minor": 1