In [19]:
# trainData_cntk.txt - 120 items (40 each class) 
# testData_cntk.txt - remaining 30 items

import numpy as np
import cntk as C
from cntk import Trainer  # to train the NN
from cntk.learners import sgd, learning_rate_schedule, \
  UnitType 
from cntk.ops import *  # input_variable() def
from cntk.logging import ProgressPrinter 
from cntk.initializer import glorot_uniform 
from cntk.layers import default_options, Dense
from cntk.io import CTFDeserializer, MinibatchSource, \
  StreamDef, StreamDefs, INFINITELY_REPEAT
  

In [20]:
np.random.seed(0)

In [21]:
def my_print(arr, dec):
  # print an array of float/double with dec decimals
  fmt = "%." + str(dec) + "f" # like %.4f
  for i in range(0, len(arr)):
    print(fmt % arr[i] + '  ', end='')
  print("\n")

In [22]:
def create_reader(path, is_training, input_dim, output_dim):
  return MinibatchSource(CTFDeserializer(path, StreamDefs(
    features = StreamDef(field='attribs', shape=input_dim,
      is_sparse=False),
    labels = StreamDef(field='species', shape=output_dim,
      is_sparse=False)
  )), randomize = is_training,
    max_sweeps = INFINITELY_REPEAT if is_training else 1)


In [5]:
def save_weights(fn, ihWeights, hBiases,
  hoWeights, oBiases):
  f = open(fn, 'w')
  for vals in ihWeights:
    for v in vals:
      f.write("%s\n" % v)
  for v in hBiases:
    f.write("%s\n" % v)
  for vals in hoWeights:
    for v in vals:
      f.write("%s\n" % v)
  for v in oBiases:
    f.write("%s\n" % v)
  f.close()

What does this data look like then?

In [17]:
%%bash 
cat "trainData_cntk.txt" | head

E r r o r :   0 x 8 0 0 7 0 0 5 7   
 

<img src="iris-model.jpg">

In [7]:
# create NN, train, test, predict
input_dim = 4
hidden_dim = 2
output_dim = 3

train_file = "trainData_cntk.txt"
test_file = "testData_cntk.txt"

input_Var = C.ops.input(input_dim, np.float32)
label_Var = C.ops.input(output_dim, np.float32)

print("Type of input_Var is: ")
print(type(input_Var))

print("Creating a 4-2-3 tanh softmax NN for Iris data ") 
with default_options(init = glorot_uniform()):
    hLayer = C.layers.Dense(hidden_dim, activation=C.ops.tanh, name='hidLayer')(input_Var)
    oLayer = Dense(output_dim, activation=C.ops.softmax, name='outLayer')(hLayer)
    nnet = oLayer

Type of input_Var is: 
<class 'cntk.variables.Variable'>
Creating a 4-2-3 tanh softmax NN for Iris data 


In [8]:

print("Creating a cross entropy mini-batch Trainer \n")
ce = C.cross_entropy_with_softmax(nnet, label_Var)
pe = C.classification_error(nnet, label_Var)

fixed_lr = 0.05
lr_per_batch = learning_rate_schedule(fixed_lr,UnitType.minibatch)
learner = C.sgd(nnet.parameters, lr_per_batch)
trainer = C.Trainer(nnet, (ce, pe), [learner])

max_iter = 5000  # 5000 maximum training iterations
batch_size = 5   # mini-batch size  5
progress_freq = 1000  # print error every n minibatches



Creating a cross entropy mini-batch Trainer 



How many epochs are we doing? (120 is the number of training examples)

In [9]:
max_iter/120

41.666666666666664

Create a reader and input map, then start training

In [10]:
reader_train = create_reader(train_file, True, input_dim, output_dim)
my_input_map = {
input_Var : reader_train.streams.features,
label_Var : reader_train.streams.labels
}
pp = ProgressPrinter(progress_freq)

print("Starting training \n")
for i in range(0, max_iter):
    currBatch = reader_train.next_minibatch(batch_size, input_map = my_input_map)
    trainer.train_minibatch(currBatch)
    pp.update_with_trainer(trainer)
print("\nTraining complete")

Starting training 

 Minibatch[   1-1000]: loss = 1.108116 * 5000;
 Minibatch[1001-2000]: loss = 1.099623 * 5000;
 Minibatch[2001-3000]: loss = 0.972342 * 5000;
 Minibatch[3001-4000]: loss = 0.807777 * 5000;
 Minibatch[4001-5000]: loss = 0.768413 * 5000;

Training complete


In [11]:
print("\nEvaluating test data \n")
reader_test = create_reader(test_file, False, input_dim, output_dim)

numTestItems = 30

allTest = reader_test.next_minibatch(numTestItems, input_map = my_input_map) 
test_error = trainer.test_minibatch(allTest)

print("Classification error on the 30 test items = %f" % test_error)


Evaluating test data 

Classification error on the 30 test items = 0.066667


In [12]:
test_file

'testData_cntk.txt'

In [23]:
# make a prediction for an unknown flower
# first train versicolor = 7.0,3.2,4.7,1.4,0,1,0
unknown = np.array([[6.9, 3.1, 4.6, 1.3]], dtype=np.float32) 
print("\nPredicting Iris species for input features: ")
my_print(unknown[0], 1)  # 1 decimal

predicted = nnet.eval( {input_Var: unknown} ) 
print("Prediction is: ")
my_print(predicted[0], 3)  # 3 decimals


Predicting Iris species for input features: 
6.9  3.1  4.6  1.3  

Prediction is: 
0.263  0.682  0.055  



In [14]:
print("\nTrained model input-to-hidden weights: \n")
print(hLayer.hidLayer.W.value)
print("\nTrained model hidden node biases: \n")
print(hLayer.hidLayer.b.value)

print("\nTrained model hidden-to-output weights: \n")
print(oLayer.outLayer.W.value)
print("\nTrained model output node biases: \n")
print(oLayer.outLayer.b.value)

save_weights("weights.txt", hLayer.hidLayer.W.value, 
    hLayer.hidLayer.b.value, oLayer.outLayer.W.value, 
    oLayer.outLayer.b.value)


Trained model input-to-hidden weights: 

[[ 0.61001551  0.92742485]
 [ 0.71527183  0.95194459]
 [-1.08553445 -0.52599716]
 [-1.06876552 -0.72444987]]

Trained model hidden node biases: 

[ 0.14688115  0.03603337]

Trained model hidden-to-output weights: 

[[ 3.22006011 -0.73119122 -4.19448519]
 [-0.85459208  0.35532627  0.0244552 ]]

Trained model output node biases: 

[ 0.18597203  0.67358345 -0.8595528 ]
