In [1]:
# yacht_reg_cntk.py
# regression on the Yacht Hull dataset
# CNTK 2.4, Python 3.5.2

import numpy as np
import cntk as C

# ------------------------------------------------------------

In [2]:
def file_len(fname):
 f = open(fname)
 for (i, l) in enumerate(f): pass
 f.close()
 return i+1

def create_reader(path, input_dim, output_dim, rnd_order, sweeps):
  x_strm = C.io.StreamDef(field='predictors', shape=input_dim, is_sparse=False)
  y_strm = C.io.StreamDef(field='resistance', shape=output_dim, is_sparse=False)
  streams = C.io.StreamDefs(x_src=x_strm, y_src=y_strm)
  deserial = C.io.CTFDeserializer(path, streams)
  mb_src = C.io.MinibatchSource(deserial, randomize=rnd_order, max_sweeps=sweeps)
  return mb_src

def mb_accuracy(mb, x_var, y_var, model, delta_pct):
  num_correct = 0
  num_wrong = 0

  x_mat = mb[x_var].asarray()  # batch_size x 1 x features_dim
  y_mat = mb[y_var].asarray()  # batch_size x 1 x 1

  # for i in range(mb[x_var].shape[0]):  # each item in the batch
  for i in range(len(mb[x_var])):
    v = model.eval(x_mat[i])           # 1 x 1 predicted value
    y = y_mat[i]                       # 1 x 1 actual value
    if np.abs(v[0,0] - y[0,0]) <  delta_pct * y:  # close enough?
      num_correct += 1
    else:
      num_wrong += 1
  return (num_correct * 100.0) / (num_correct + num_wrong)

def mb_mean_sq_error(mb, x_var, y_var, model):
  x_mat = mb[x_var].asarray()  # batch_size x 1 x features_dim
  y_mat = mb[y_var].asarray()  # batch_size x 1 x 1

  sum = 0.0
  for i in range(len(mb[x_var])):
    v = model.eval(x_mat[i])           # 1 x 1 predicted value
    y = y_mat[i]                       # 1 x 1 actual value
    sum += (v[0,0] - y[0,0]) * (v[0,0] - y[0,0])
  return sum / len(mb[x_var])

# ------------------------------------------------------------

In [3]:
# 0. get started
print("\nYacht Hull regression example using CNTK \n")
np.random.seed(1)

cv = C.__version__
print("Using CNTK: ", cv, "\n")

train_file = ".\\Data\\yacht_train_mm_tab_cntk.txt"  # 277 items
test_file = ".\\Data\\yacht_test_mm_tab_cntk.txt"  # 31 items
input_dim = 6; hidden_dim = 10; output_dim = 1

X = C.ops.input_variable(input_dim, np.float32)
Y = C.ops.input_variable(output_dim)


Yacht Hull regression example using CNTK 

Using CNTK:  2.4 



In [4]:
# 1. prepare data reader
# |predictors	 0.560000   0.228571 . . 0.692308  |resistance  0.117930
#   [0]          [1]       [2]          [6]         [7]         [8]
rdr = create_reader(train_file, input_dim, output_dim, rnd_order=True, sweeps=C.io.INFINITELY_REPEAT)
yacht_map = {
X : rdr.streams.x_src,
Y : rdr.streams.y_src
}

In [5]:
# 2. define 6-(10-10)-1 deep NN model
my_init = C.initializer.glorot_uniform(seed=1)
model = C.layers.Sequential([
C.layers.Dense(hidden_dim, activation=C.ops.tanh, name='hidLayer1', init=my_init),
C.layers.Dense(hidden_dim, activation=C.ops.tanh, name='hidLayer2', init=my_init),
C.layers.Dense(output_dim, activation=None, name='outLayer', init=my_init)])(X)

In [6]:
# 3. (no explicit compilation in CNTK)

In [9]:
# 4. train model
max_iter = 10000
batch_size = 16
basic_sgd = C.learners.sgd(model.parameters, lr=0.05)
tr_loss = C.squared_error(model, Y)
trainer = C.Trainer(model, (tr_loss, None), [basic_sgd]) 

for i in range(0, max_iter):
    curr_batch = rdr.next_minibatch(batch_size, input_map=yacht_map)
    trainer.train_minibatch(curr_batch)
    if i % int(max_iter/10) == 0:
      mcee = trainer.previous_minibatch_loss_average 
      acc = mb_accuracy(curr_batch, X, Y, model, delta_pct=0.15)
      print("batch %6d: mean squared error = %8.4f  accuracy = %5.2f%%" % (i, mcee, acc))

batch      0: mean squared error =   0.0005  accuracy = 43.75%
batch   1000: mean squared error =   0.0005  accuracy = 31.25%
batch   2000: mean squared error =   0.0003  accuracy = 37.50%
batch   3000: mean squared error =   0.0012  accuracy = 43.75%
batch   4000: mean squared error =   0.0004  accuracy = 50.00%
batch   5000: mean squared error =   0.0015  accuracy = 50.00%
batch   6000: mean squared error =   0.0003  accuracy = 43.75%
batch   7000: mean squared error =   0.0001  accuracy = 43.75%
batch   8000: mean squared error =   0.0003  accuracy = 37.50%
batch   9000: mean squared error =   0.0003  accuracy = 31.25%


In [10]:
# 5. evaluate model
rdr = create_reader(train_file, input_dim, output_dim, rnd_order=False, sweeps=1)

yacht_map = {
X : rdr.streams.x_src,
Y : rdr.streams.y_src
}

num_test = file_len(test_file)  # 31
all_test = rdr.next_minibatch(num_test, input_map=yacht_map)
acc = mb_accuracy(all_test, X, Y, model, delta_pct=0.15)
mse = mb_mean_sq_error(all_test, X, Y, model)
print("\nFinal overall accuracy on test data  = %0.2f%% " % acc) 
print("Final overall loss (mse) on test data = %0.6f" % mse)  


Final overall accuracy on test data  = 35.48% 
Final overall loss (mse) on test data = 0.000238


In [11]:
# 6. save model
mp = ".\\Models\\yacht_model.cntk"
model.save(mp)

In [12]:
# 7. make a prediction for previously unseen data
inpts = np.array([[0.5611, 0.2285, 0.9511, 0.0011, 0.4615, 0.9999]], dtype=np.float32)
pred = model.eval(inpts)
np.set_printoptions(precision=4, suppress=True)
print("\nMaking prediction for (mm-normalized) previously unseen inputs of: ")
print(inpts)
print("\nPredicted (mm-normalized) hull resistance is: ")
print(pred)

print("\nEnd Yacht Hull demo")


Making prediction for (mm-normalized) previously unseen inputs of: 
[[0.5611 0.2285 0.9511 0.0011 0.4615 0.9999]]

Predicted (mm-normalized) hull resistance is: 
[[0.8253]]

End Yacht Hull demo
