Permalink
113 lines (100 sloc) 5.4 KB
import mxnet as mx
from data import get_iterator
import argparse
import train_model
def get_mlp():
"""
multi-layer perceptron
"""
data = mx.symbol.Variable('data')
fc1 = mx.symbol.CaffeOp(data_0=data, num_weight=2, name='fc1', prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 128} }")
act1 = mx.symbol.CaffeOp(data_0=fc1, prototxt="layer{type:\"TanH\"}")
fc2 = mx.symbol.CaffeOp(data_0=act1, num_weight=2, name='fc2', prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 64} }")
act2 = mx.symbol.CaffeOp(data_0=fc2, prototxt="layer{type:\"TanH\"}")
fc3 = mx.symbol.CaffeOp(data_0=act2, num_weight=2, name='fc3', prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 10}}")
if use_caffe_loss:
label = mx.symbol.Variable('softmax_label')
mlp = mx.symbol.CaffeLoss(data=fc3, label=label, grad_scale=1, name='softmax', prototxt="layer{type:\"SoftmaxWithLoss\"}")
else:
mlp = mx.symbol.SoftmaxOutput(data=fc3, name='softmax')
return mlp
def get_lenet():
"""
LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick
Haffner. "Gradient-based learning applied to document recognition."
Proceedings of the IEEE (1998)
"""
data = mx.symbol.Variable('data')
# first conv
conv1 = mx.symbol.CaffeOp(data_0=data, num_weight=2, prototxt="layer{type:\"Convolution\" convolution_param { num_output: 20 kernel_size: 5 stride: 1} }")
act1 = mx.symbol.CaffeOp(data_0=conv1, prototxt="layer{type:\"TanH\"}")
pool1 = mx.symbol.CaffeOp(data_0=act1, prototxt="layer{type:\"Pooling\" pooling_param { pool: MAX kernel_size: 2 stride: 2}}")
# second conv
conv2 = mx.symbol.CaffeOp(data_0=pool1, num_weight=2, prototxt="layer{type:\"Convolution\" convolution_param { num_output: 50 kernel_size: 5 stride: 1} }")
act2 = mx.symbol.CaffeOp(data_0=conv2, prototxt="layer{type:\"TanH\"}")
pool2 = mx.symbol.CaffeOp(data_0=act2, prototxt="layer{type:\"Pooling\" pooling_param { pool: MAX kernel_size: 2 stride: 2}}")
fc1 = mx.symbol.CaffeOp(data_0=pool2, num_weight=2, prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 500} }")
act3 = mx.symbol.CaffeOp(data_0=fc1, prototxt="layer{type:\"TanH\"}")
# second fullc
fc2 = mx.symbol.CaffeOp(data_0=act3, num_weight=2, prototxt="layer{type:\"InnerProduct\"inner_product_param{num_output: 10} }")
if use_caffe_loss:
label = mx.symbol.Variable('softmax_label')
lenet = mx.symbol.CaffeLoss(data=fc2, label=label, grad_scale=1, name='softmax', prototxt="layer{type:\"SoftmaxWithLoss\"}")
else:
lenet = mx.symbol.SoftmaxOutput(data=fc2, name='softmax')
return lenet
def get_network_from_json_file(file_name):
network = mx.sym.load(file_name)
return network
def parse_args():
parser = argparse.ArgumentParser(description='train an image classifier on mnist')
parser.add_argument('--network', type=str, default='lenet',
help='the cnn to use (mlp | lenet | <path to network json file>')
parser.add_argument('--caffe-loss', type=int, default=0,
help='Use CaffeLoss symbol')
parser.add_argument('--caffe-data', type=bool, default=False,
help='Use Caffe input-data layer (True | False)')
parser.add_argument('--data-dir', type=str, default='mnist/',
help='the input data directory')
parser.add_argument('--gpus', type=str,
help='the gpus will be used, e.g "0,1,2,3"')
parser.add_argument('--num-examples', type=int, default=60000,
help='the number of training examples')
parser.add_argument('--batch-size', type=int, default=128,
help='the batch size')
parser.add_argument('--lr', type=float, default=.1,
help='the initial learning rate')
parser.add_argument('--model-prefix', type=str,
help='the prefix of the model to load/save')
parser.add_argument('--save-model-prefix', type=str,
help='the prefix of the model to save')
parser.add_argument('--num-epochs', type=int, default=10,
help='the number of training epochs')
parser.add_argument('--load-epoch', type=int,
help="load the model on an epoch using the model-prefix")
parser.add_argument('--kv-store', type=str, default='local',
help='the kvstore type')
parser.add_argument('--lr-factor', type=float, default=1,
help='times the lr with a factor for every lr-factor-epoch epoch')
parser.add_argument('--lr-factor-epoch', type=float, default=1,
help='the number of epoch to factor the lr, could be .5')
return parser.parse_args()
if __name__ == '__main__':
args = parse_args()
use_caffe_loss = args.caffe_loss
use_caffe_data = args.caffe_data
data_shape = ()
if args.network == 'mlp':
data_shape = (784, )
net = get_mlp()
elif args.network == 'lenet':
if not use_caffe_data:
data_shape = (1, 28, 28)
net = get_lenet()
else:
net = get_network_from_json_file(args.network)
# train
if use_caffe_loss:
train_model.fit(args, net, get_iterator(data_shape, use_caffe_data), mx.metric.Caffe())
else:
train_model.fit(args, net, get_iterator(data_shape, use_caffe_data))