<a href="https://colab.research.google.com/github/fenwickslab/fenwicks/blob/master/tutorials/tutorial2_cifar10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!nvidia-smi

Tue May 14 17:40:44 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.56       Driver Version: 410.79       CUDA Version: 10.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   68C    P8    18W /  70W |      0MiB / 15079MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [0]:
!pip install -qq -U cufflinks

In [0]:
import numpy as np
import tensorflow as tf
import os

In [4]:
if tf.io.gfile.exists('./fenwicks'):
  tf.io.gfile.rmtree('./fenwicks')
!git clone https://github.com/fenwickslab/fenwicks.git

import fenwicks as fw

Cloning into 'fenwicks'...
remote: Enumerating objects: 25, done.[K
remote: Counting objects:   4% (1/25)   [Kremote: Counting objects:   8% (2/25)   [Kremote: Counting objects:  12% (3/25)   [Kremote: Counting objects:  16% (4/25)   [Kremote: Counting objects:  20% (5/25)   [Kremote: Counting objects:  24% (6/25)   [Kremote: Counting objects:  28% (7/25)   [Kremote: Counting objects:  32% (8/25)   [Kremote: Counting objects:  36% (9/25)   [Kremote: Counting objects:  40% (10/25)   [Kremote: Counting objects:  44% (11/25)   [Kremote: Counting objects:  48% (12/25)   [Kremote: Counting objects:  52% (13/25)   [Kremote: Counting objects:  56% (14/25)   [Kremote: Counting objects:  60% (15/25)   [Kremote: Counting objects:  64% (16/25)   [Kremote: Counting objects:  68% (17/25)   [Kremote: Counting objects:  72% (18/25)   [Kremote: Counting objects:  76% (19/25)   [Kremote: Counting objects:  80% (20/25)   [Kremote: Counting objects:  84% (21/25)  

In [5]:
BATCH_SIZE = 512 #@param ["512", "256", "128"] {type:"raw"}
MOMENTUM = 0.9 #@param ["0.9", "0.95", "0.975"] {type:"raw"}
WEIGHT_DECAY = 0.0005 #@param ["0.000125", "0.00025", "0.0005"] {type:"raw"}
LEARNING_RATE = 0.4 #@param ["0.4", "0.2", "0.1"] {type:"raw"}
EPOCHS = 24 #@param {type:"slider", min:0, max:100, step:1}
WARMUP = 5 #@param {type:"slider", min:0, max:24, step:1}
BUCKET = '.'
PROJECT = 'tutorial2'
VALID_BATCH_SIZE = 1000

In [6]:
data_dir, work_dir = fw.io.get_project_dirs(BUCKET, PROJECT)

In [7]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
n_train, n_test = X_train.shape[0], X_test.shape[0]
img_size = X_train.shape[1]
n_classes = y_train.max() + 1

In [8]:
fw.plt.plot_counts_pie(y_train.reshape(-1))

In [9]:
X_train_mean = np.mean(X_train, axis=(0,1,2))
X_train_std = np.std(X_train, axis=(0,1,2))
X_train = (X_train - X_train_mean) / X_train_std
X_test = (X_test - X_train_mean) / X_train_std

In [10]:
train_fn = os.path.join(data_dir, "train.tfrec")
test_fn = os.path.join(data_dir, "test.tfrec")

fw.data.numpy_tfrecord(train_fn, X_train, y_train)
fw.data.numpy_tfrecord(test_fn, X_test, y_test)

INFO:tensorflow:Output file already exists. Skipping.
INFO:tensorflow:Output file already exists. Skipping.


In [11]:
def parser_train(tfexample):
  x, y = fw.data.tfexample_numpy_image_parser(tfexample, img_size, img_size)
  x = fw.transform.random_pad_crop(x, 4)
  x = fw.transform.random_flip(x)
  x = fw.transform.cutout(x, 8, 8)
  return x, y

parser_test = lambda x: fw.data.tfexample_numpy_image_parser(x, img_size, img_size)

In [12]:
train_input_func = lambda params: fw.data.tfrecord_ds(train_fn, parser_train, batch_size=params['batch_size'], training=True)
eval_input_func = lambda params: fw.data.tfrecord_ds(test_fn, parser_test, batch_size=params['batch_size'], training=False)

In [13]:
converter = fw.transform.tfm_reverse_standard_scaler(X_train_mean/255, X_train_std/255)
fw.anim.show_input_func(eval_input_func, converter=converter)

In [14]:
def build_nn(c=64, weight=0.125):
  model = fw.Sequential()
  model.add(fw.layers.ConvBN(c, **fw.layers.PYTORCH_PARAMS))
  model.add(fw.layers.ConvResBlk(c*2, res_convs=2, **fw.layers.PYTORCH_PARAMS))
  model.add(fw.layers.ConvBlk(c*4, **fw.layers.PYTORCH_PARAMS))
  model.add(fw.layers.ConvResBlk(c*8, res_convs=2, **fw.layers.PYTORCH_PARAMS))
  model.add(tf.keras.layers.GlobalMaxPool2D())
  model.add(fw.layers.Classifier(n_classes, kernel_initializer=fw.layers.init_pytorch, weight=weight))
  return model

In [15]:
steps_per_epoch = n_train // BATCH_SIZE
total_steps = steps_per_epoch * EPOCHS
warmup_steps = steps_per_epoch * WARMUP

In [16]:
lr_decay = fw.train.linear_decay()
lr_func = fw.train.one_cycle_lr(LEARNING_RATE/BATCH_SIZE, total_steps, warmup_steps, lr_decay)
fw.plt.plot_lr_func(lr_func, total_steps)

Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


In [17]:
opt_func = fw.train.sgd_optimizer(lr_func, mom=MOMENTUM, wd=WEIGHT_DECAY*BATCH_SIZE)
model_func = fw.train.get_clf_model_func(build_nn, opt_func, reduction=tf.losses.Reduction.SUM)

In [18]:
est = fw.train.get_tpu_estimator(steps_per_epoch, model_func, work_dir, trn_bs=BATCH_SIZE, val_bs=VALID_BATCH_SIZE)
est.train(train_input_func, steps=total_steps)

INFO:tensorflow:Using config: {'_model_dir': './work/tutorial2/2019-05-14-17:41:05', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f33a85cc208>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_tpu_config': TPUConfig(iterations_per_loop=97, num_shards=None, num_cores_per_replica=None, per_host_input_for_training=3, tpu_job_name=None, initial_infeed

<tensorflow.contrib.tpu.python.tpu.tpu_estimator.TPUEstimator at 0x7f33a8873588>

In [19]:
result = est.evaluate(eval_input_func, steps=n_test // VALID_BATCH_SIZE)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Running eval on CPU
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-05-14T17:50:13Z
INFO:tensorflow:Graph was finalized.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from ./work/tutorial2/2019-05-14-17:41:05/model.ckpt-2328
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [1/10]
INFO:tensorflow:Evaluation [2/10]
INFO:tensorflow:Evaluation [3/10]
INFO:tensorflow:Evaluation [4/10]
INFO:tensorflow:Evaluation [5/10]
INFO:tensorflow:Evaluation [6/10]
INFO:tensorflow:Evaluation [7/10]
INFO:tensorflow:Evaluation [8/10]
INFO:tensorflow:Evaluation [9/10]
INFO:tensorflow:Evaluation [10/10]
INFO:tensorflow:Finished evaluation at 2019-05-14-17:50:18
INFO:tensorflow:Saving dict for global step 2328: accuracy = 0.9409, global_step = 2328, loss = 180.38521
INFO:tensorflow:Saving 

In [20]:
print(f'Test results: accuracy={result["accuracy"] * 100: .2f}%, loss={result["loss"]: .2f}.')

Test results: accuracy= 94.09%, loss= 180.39.


In [21]:
fw.io.create_clean_dir(work_dir)