Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
debug g2c
Browse files Browse the repository at this point in the history
  • Loading branch information
ZiyueHuang committed Nov 17, 2017
1 parent 77734d5 commit caabdc5
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 5 deletions.
7 changes: 5 additions & 2 deletions example/sparse/matrix_fact_parallel_model.py
Expand Up @@ -32,9 +32,12 @@ def matrix_fact_model_parallel_net(factor_size, num_hidden, max_user, max_item):
item_weight = mx.symbol.Variable('item_weight', stype='row_sparse')
item = mx.symbol.contrib.SparseEmbedding(data=item, weight=item_weight,
input_dim=max_item, output_dim=factor_size)
# set ctx_group attribute to 'dev2' for the symbols created in this scope,
# the symbols will be bound to the context that 'dev2' map to in group2ctxs
# set ctx_group attribute to 'dev2' for the symbols created in this scope,
# the symbols will be bound to the context that 'dev2' map to in group2ctxs
with mx.AttrScope(ctx_group='dev2'):
weight = mx.symbol.Variable('ufcweight')
bias = mx.symbol.Variable('ufcbias')
user = mx.symbol.FullyConnected(data=user, weight=weight, bias=bias, num_hidden=num_hidden)
# predict by the inner product, which is elementwise product and then sum
pred = user * item
pred = mx.symbol.sum(data=pred, axis=1)
Expand Down
2 changes: 1 addition & 1 deletion example/sparse/matrix_factorization_model_parallel.py
Expand Up @@ -83,7 +83,7 @@

# initialize the module
# map the ctx_group attribute to the context assignment
group2ctxs={'dev1':mx.cpu(), 'dev2':[mx.gpu(i) for i in range(num_gpus)]}
group2ctxs={'dev1':mx.cpu(), 'dev2':[mx.cpu(i) for i in range(num_gpus)]}
mod = mx.module.Module(symbol=net, context=[mx.cpu()]*num_gpus, data_names=['user', 'item'],
label_names=['score'], group2ctxs=group2ctxs)
mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label)
Expand Down
44 changes: 42 additions & 2 deletions src/executor/graph_executor.cc
Expand Up @@ -362,6 +362,7 @@ Graph AssignContext(Graph g,

// loop through all the rest of input nodes not specified
// in the ctx_map and populate maps and lists
LOG(INFO) << "args context";
size_t arg_top = 0, aux_top = 0;
for (size_t i = 0; i < num_forward_inputs; ++i) {
const uint32_t nid = idx.input_nodes().at(i);
Expand All @@ -380,10 +381,22 @@ Graph AssignContext(Graph g,
ctx_list.push_back(ctx); // save the current ctx in the list
}
device[nid] = ctx2id.at(ctx); // assign device id to the current node
LOG(INFO) << "nid: " << nid << " ctx.dev_id " << ctx.dev_id;
}

LOG(INFO) << "=====================";
LOG(INFO) << num_forward_outputs << " num_forward_outputs";
LOG(INFO) << g.outputs.size() << " g.outputs.size()";
LOG(INFO) << arg_grad_ctxes.size() << " arg_grad_ctxes.size()";

// loop through backward input nodes and populate maps and lists
// the backward input nodes is the gradient of the loss wrt the output
LOG(INFO) << "arg grads contexts";
for (size_t i = num_forward_outputs; i < g.outputs.size(); ++i){
const uint32_t nid = idx.outputs()[i].node_id;
Context ctx = arg_grad_ctxes[i - num_forward_outputs];
LOG(INFO) << "nid " << nid << " ctx " << ctx.dev_id;
}
LOG(INFO) << "=====================";
for (size_t i = num_forward_outputs; i < g.outputs.size(); ++i) {
const uint32_t nid = idx.outputs()[i].node_id;
Context ctx = arg_grad_ctxes[i - num_forward_outputs];
Expand All @@ -393,7 +406,34 @@ Graph AssignContext(Graph g,
}
int devid = ctx2id.at(ctx);
if (device[nid] != -1) {
CHECK_EQ(device[nid], devid) << "device of same output not equal to each other";
LOG(INFO) << "fail nid " << nid << " ctx " << ctx.dev_id;
const nnvm::IndexedGraph::Node fail_node = idx[nid];
// print the graph structure
const auto& ret = g;
const auto &idx = ret.indexed_graph();
uint32_t node_start = 0, node_end = idx.num_nodes();
if (ret.attrs.count("node_range")) {
const auto& range = ret.GetAttr<std::pair<uint32_t, uint32_t> >("node_range");
node_start = range.first;
node_end = range.second;
}
for (uint32_t nid = node_start; nid < node_end; ++nid) {
const auto& inode = idx[nid];
if (inode.source->is_variable()) {
LOG(INFO) << "node " << nid << " var " << inode.source->attrs.name;
} else {
LOG(INFO) << "node " << nid << " " << inode.source->attrs.op->name;
for (const auto& e : inode.inputs) {
auto eid = idx.entry_id(e);
LOG(INFO) << "\t\tinput " << eid << " (entry id)";
}
for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
uint32_t eid = idx.entry_id(nid, index);
LOG(INFO) << "\t\toutput " << eid << " (entry id)";
}
}
} // end of the print
CHECK_EQ(device[nid], devid) << fail_node.source->attrs.name << " device of same output not equal to each other";
} else {
device[nid] = devid;
}
Expand Down

0 comments on commit caabdc5

Please sign in to comment.