Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,27 @@ static void AddInitializerAsInput(onnxruntime::Graph& dst_graph,
}
}

// To check if the input parameters of a DQ or Q node are quantization parameters
// Scale and Zero point parameters are quantization parameters
static bool IsQuantizationParameter(const std::string& initializer_name,
const onnxruntime::GraphViewer& src_graph) {
// Check if this initializer is used as scale or zero_point in any DQ/Q node
for (auto& node_idx : src_graph.GetNodesInTopologicalOrder()) {
const auto* node = src_graph.GetNode(node_idx);
if (node->OpType() == "DequantizeLinear" || node->OpType() == "QuantizeLinear") {
const auto& input_defs = node->InputDefs();
// Check if this initializer is used as scale (input 1) or zero_point (input 2)
if (input_defs.size() >= 2 && input_defs[1]->Name() == initializer_name) {
return true; // This is a scale parameter
}
if (input_defs.size() >= 3 && input_defs[2]->Name() == initializer_name) {
return true; // This is a zero_point parameter
}
}
}
return false;
}

// Creates a new model without the DQ/Q operators in the src graph.
Status CreateModelWithStrippedQDQNodes(const GraphViewer& src_graph,
const logging::Logger& logger,
Expand Down Expand Up @@ -845,19 +866,31 @@ Status CreateModelWithStrippedQDQNodes(const GraphViewer& src_graph,
if (!init_with_data &&
utils::HasExternalData(initializer_tensor) &&
enable_ovep_weight_sharing) {
insert_metadata(initializer_tensor);

// Add initializer with external data as input
AddInitializerAsInput(dst_graph, accumulated_inputs, src_graph, name);
} else {
// Add as an initialized tensor if it does not have external data
if (initializers_to_keep.count(name) > 0) {
if (init_with_data) {
dst_graph.AddInitializedTensor(*init_with_data);
// Only convert to input if it's not a quantization parameter
bool is_quant_param = IsQuantizationParameter(name, src_graph);

if (!is_quant_param) {
// This is actual weight data - so to convert to input for weight sharing
insert_metadata(initializer_tensor);
AddInitializerAsInput(dst_graph, accumulated_inputs, src_graph, name);
} else {
dst_graph.AddInitializedTensor(initializer_tensor);
// This is a quantization parameter - keep as initializer even if external

if (initializers_to_keep.count(name) > 0) {

dst_graph.AddInitializedTensor(initializer_tensor);
}
}
} else {
// Add as an initialized tensor if it does not have external data
if (initializers_to_keep.count(name) > 0) {
if (init_with_data) {
dst_graph.AddInitializedTensor(*init_with_data);
} else {
dst_graph.AddInitializedTensor(initializer_tensor);
}
}
}
}

current_scope_initializer_set.insert(name);
Expand Down
Loading