[BenchmarkGen] support generate CNN benchmarks without considering th…

…e last dense layer (#15)
UIUC-ChenLab · Jan 19, 2021 · 63043d3 · 63043d3
1 parent 42ffa43
commit 63043d3
Show file tree

Hide file tree

Showing 5 changed files with 183 additions and 34 deletions.
diff --git a/config/cnn-config.ini b/config/cnn-config.ini
@@ -1,6 +1,13 @@
 [config]
+# input and output information
+inputChannel=3
 inputHeight=224
 inputWidth=224
-inputChannel=3
 outputChannel=1000
+
+# Generation rules
+batchSize=1
+minChannel=64
 maxChannel=512
+poolingNumber=5
+bypassNumber=0
diff --git a/include/Dialect/HLSKernel/CNNOps.td b/include/Dialect/HLSKernel/CNNOps.td
@@ -36,10 +36,8 @@ def ConvOp : HLSKernelOp<"conv", [HLSKernelOpInterface]> {
 
     Strides and padding are integer attributes of the same rank as the number of
     window dimensions. The padding attribute specifies the amount of zero
-    padding to be applied to the base area, which is a n-d array of (low, high)
-    padding. Each pair has the low padding as the first element and the high
-    padding as the second element. Using padding is equivalent to inserting
-    those same zero values into the input before the convolution.
+    padding to be applied to the base area, which is a 4-d array of (top,
+    bottom, left, right) padding.
   }];
 
   let arguments = (ins
@@ -48,7 +46,7 @@ def ConvOp : HLSKernelOp<"conv", [HLSKernelOpInterface]> {
     AnyMemRef:$B,
     AnyMemRef:$O,
     OptionalAttr<I64ArrayAttr>:$strides,
-    OptionalAttr<I64ElementsAttr>:$padding
+    OptionalAttr<I64ArrayAttr>:$padding
   );
 }
 
@@ -67,7 +65,7 @@ def MaxPoolOp : HLSKernelOp<"maxpool", [HLSKernelOpInterface]> {
     AnyMemRef:$O,
     OptionalAttr<I64ArrayAttr>:$kernel_shape,
     OptionalAttr<I64ArrayAttr>:$strides,
-    OptionalAttr<I64ElementsAttr>:$padding
+    OptionalAttr<I64ArrayAttr>:$padding
   );
 }
 

diff --git a/lib/Conversion/HLSKernelToAffine/HLSKernelToAffine.cpp b/lib/Conversion/HLSKernelToAffine/HLSKernelToAffine.cpp
@@ -45,15 +45,15 @@ class HLSKernelVisitor : public HLSKernelVisitorBase<HLSKernelVisitor, bool> {
   OpBuilder &builder;
   Location loc;
 
-  // Helpers for creating loops.
-  // Constant upper and lower bound.
+  /// Helpers for creating loops.
+  /// Constant upper and lower bound.
   Value createLoop(int64_t lower, int64_t upper, int64_t step = 1) {
     auto loop = builder.create<mlir::AffineForOp>(loc, lower, upper, step);
     builder.setInsertionPointToStart(&loop.getLoopBody().front());
     return loop.getInductionVar();
   }
 
-  // General case.
+  /// General case loop boundary.
   Value createLoop(std::initializer_list<Value> lower, AffineMap lowerMap,
                    std::initializer_list<Value> upper, AffineMap upperMap,
                    int64_t step = 1) {
@@ -80,7 +80,7 @@ class HLSKernelVisitor : public HLSKernelVisitorBase<HLSKernelVisitor, bool> {
     return createLoop({}, lowerMap, {upper}, upperMap);
   }
 
-  // Helpers for creating constant, loads, stores and binary operations.
+  /// Helpers for creating constant, loads, stores and binary operations.
   Value createConst(int64_t val, Type valType) {
     if (valType.isa<IntegerType>())
       return builder.create<mlir::ConstantOp>(
@@ -170,8 +170,8 @@ bool HLSKernelVisitor::visitOp(DenseOp op) {
 /// Padding and strides has not been suppored.
 bool HLSKernelVisitor::visitOp(ConvOp op) {
   SmallVector<int64_t, 4> padding;
-  for (auto pad : op.getAttrOfType<DenseIntElementsAttr>("padding"))
-    padding.push_back(pad.getSExtValue());
+  for (auto pad : op.getAttrOfType<ArrayAttr>("padding"))
+    padding.push_back(pad.cast<IntegerAttr>().getInt());
 
   auto I = op.getOperand(0);
   auto K = op.getOperand(1);
@@ -257,8 +257,8 @@ bool HLSKernelVisitor::visitOp(ConvOp op) {
   return true;
 }
 
-// Padding and strides has not been suppored. Only support when kernel size is
-// equal to stride size.
+/// Padding and strides has not been suppored. Only support when kernel size is
+/// equal to stride size.
 bool HLSKernelVisitor::visitOp(MaxPoolOp op) {
   SmallVector<int64_t, 2> kernelShape;
   for (auto shape : op.getAttrOfType<ArrayAttr>("kernel_shape"))
@@ -394,7 +394,7 @@ bool HLSKernelVisitor::visitOp(MergeOp op) {
 // BLASOps Handler
 //===----------------------------------------------------------------------===//
 
-// Only default attributes configuration are supported.
+/// Only default attributes configuration are supported.
 bool HLSKernelVisitor::visitOp(GemmOp op) {
   auto alpha = op.getOperand(0);
   auto beta = op.getOperand(1);

diff --git a/test/Conversion/HLSKernelToAffine/test_conv.mlir b/test/Conversion/HLSKernelToAffine/test_conv.mlir
@@ -2,6 +2,6 @@
 
 // CHECK: module {
 func @test_conv(%I: memref<10x3x32x32xf32>, %K: memref<16x3x5x5xf32>, %B: memref<16xf32>, %O: memref<10x16x32x32xf32>) -> () {
-  "hlskernel.conv" (%I, %K, %B, %O) {padding=dense<[[2, 2], [2, 2]]>: tensor<2x2xi64>} : (memref<10x3x32x32xf32>, memref<16x3x5x5xf32>, memref<16xf32>, memref<10x16x32x32xf32>) -> ()
+  "hlskernel.conv" (%I, %K, %B, %O) {padding=[2, 2, 2, 2]} : (memref<10x3x32x32xf32>, memref<16x3x5x5xf32>, memref<16xf32>, memref<10x16x32x32xf32>) -> ()
   return
 }
diff --git a/tools/benchmark-gen/benchmark-gen.cpp b/tools/benchmark-gen/benchmark-gen.cpp
@@ -26,7 +26,7 @@ using namespace hlskernel;
 
 static llvm::cl::opt<std::string>
     benchmarkType("type", llvm::cl::desc("Benchmark type"),
-                  llvm::cl::value_desc("cnn/image"), llvm::cl::init("cnn"));
+                  llvm::cl::value_desc("cnn/blas/isp"), llvm::cl::init("cnn"));
 
 static llvm::cl::opt<std::string>
     configFilename("config", llvm::cl::desc("Configuration filename"),
@@ -41,31 +41,174 @@ static llvm::cl::opt<std::string>
     outputFilename("o", llvm::cl::desc("Output filename"),
                    llvm::cl::value_desc("filename"), llvm::cl::init("-"));
 
-static LogicalResult benchmarkGen(raw_ostream &os) {
+namespace {
+/// Class for automatically generating benchmarks.
+class BenchmarkGenerator {
+public:
+  explicit BenchmarkGenerator(raw_ostream &os, ModuleOp &module)
+      : os(os), module(module) {}
+
+  raw_ostream &os;
+  ModuleOp &module;
+
+  /// Methods for generating various types of benchmarks.
+  LogicalResult genCNN(INIReader config);
+  LogicalResult genBLAS(INIReader config) { return failure(); }
+  LogicalResult genISP(INIReader config) { return failure(); }
+}; // namespace
+} // namespace
+
+/// Currently bypass have not been supported.
+LogicalResult BenchmarkGenerator::genCNN(INIReader config) {
+  // Parse configuration file.
+  if (config.ParseError())
+    llvm::outs() << "error: cnn configuration file parse fail\n";
+
+  const auto inputChannel = config.GetInteger("config", "inputChannel", 3);
+  const auto inputHeight = config.GetInteger("config", "inputHeight", 224);
+  const auto inputWidth = config.GetInteger("config", "inputWidth", 224);
+  const auto outputChannel = config.GetInteger("config", "outputChannel", 1000);
+
+  const auto batchSize = config.GetInteger("config", "batchSize", 1);
+  const auto minChannel = config.GetInteger("config", "minChannel", 64);
+  const auto maxChannel = config.GetInteger("config", "maxChannel", 512);
+  const auto poolingNumber = config.GetInteger("config", "poolingNumber", 5);
+  // const auto bypassNumber = config.GetInteger("config", "bypassNumber", 0);
+
+  // Create a new builder in the target module.
+  OpBuilder builder(module.getBodyRegion());
+  auto loc = module.getLoc();
+  std::srand(std::time(nullptr));
+
+  // Helpers.
+  auto getMemType = [&](std::initializer_list<int64_t> shape) {
+    return MemRefType::get(shape, builder.getF32Type());
+  };
+
+  auto getKernelShape = [&]() { return std::rand() % 3 * 2 + 3; };
+
+  auto getChannel = [&](int current) {
+    if (std::rand() % 4 == 0 && current < maxChannel)
+      return current * 2;
+    else
+      return current;
+  };
+
+  auto getPoolingFlag = [&](int current) {
+    if ((std::rand() % 4 == 0 || current == 0) && current < poolingNumber)
+      return true;
+    else
+      return false;
+  };
+
+  // Generate function signature and create a new function.
+  SmallVector<mlir::Type, 2> inputTypes;
+  inputTypes.push_back(
+      getMemType({batchSize, inputChannel, inputHeight, inputWidth}));
+  inputTypes.push_back(getMemType({outputChannel}));
+  SmallVector<mlir::Type, 2> outputTypes;
+
+  auto func = builder.create<FuncOp>(
+      loc, "auto_gen_cnn", builder.getFunctionType(inputTypes, outputTypes));
+  func.addEntryBlock();
+  builder.setInsertionPointToStart(&func.front());
+
+  // Initialize status registers.
+  int poolingCount = 0;
+  bool poolingFlag = getPoolingFlag(poolingCount);
+  int kernelShape = getKernelShape();
+  int padding = (kernelShape - 1) / 2;
+
+  int topChannel = inputChannel;
+  int topHeight = inputHeight;
+  int topWidth = inputWidth;
+
+  int btmChannel = minChannel;
+  int btmHeight = poolingFlag ? topHeight / 2 : topHeight;
+  int btmWidth = poolingFlag ? topWidth / 2 : topWidth;
+
+  // Memory references.
+  SmallVector<mlir::Value, 32> fmaps;
+  SmallVector<mlir::Value, 32> kernels;
+  SmallVector<mlir::Value, 32> biases;
+  fmaps.push_back(func.getArgument(0));
+
+  // Generate CNN model.
+  while (poolingCount < poolingNumber || btmChannel < maxChannel) {
+    // Create convolutional layer.
+    fmaps.push_back(builder.create<mlir::AllocOp>(
+        loc, getMemType({batchSize, btmChannel, topHeight, topWidth})));
+    kernels.push_back(builder.create<mlir::AllocOp>(
+        loc, getMemType({btmChannel, topChannel, kernelShape, kernelShape})));
+    biases.push_back(
+        builder.create<mlir::AllocOp>(loc, getMemType({btmChannel})));
+
+    builder.create<ConvOp>(
+        loc, *std::prev(fmaps.end(), 2), kernels.back(), biases.back(),
+        fmaps.back(), builder.getI64ArrayAttr({1, 1}),
+        builder.getI64ArrayAttr({padding, padding, padding, padding}));
+
+    // Create ReLU layer.
+    fmaps.push_back(builder.create<mlir::AllocOp>(
+        loc, getMemType({batchSize, btmChannel, topHeight, topWidth})));
+    builder.create<ReluOp>(loc, *std::prev(fmaps.end(), 2), fmaps.back());
+
+    // Create max pooling layer if applied.
+    if (poolingFlag) {
+      fmaps.push_back(builder.create<mlir::AllocOp>(
+          loc, getMemType({batchSize, btmChannel, btmHeight, btmWidth})));
+      builder.create<MaxPoolOp>(loc, *std::prev(fmaps.end(), 2), fmaps.back(),
+                                builder.getI64ArrayAttr({2, 2}),
+                                builder.getI64ArrayAttr({2, 2}),
+                                builder.getI64ArrayAttr({0, 0, 0, 0}));
+    }
+
+    // Update status registers.
+    poolingCount = poolingFlag ? poolingCount + 1 : poolingCount;
+    poolingFlag = getPoolingFlag(poolingCount);
+    kernelShape = getKernelShape();
+    padding = (kernelShape - 1) / 2;
+
+    topChannel = btmChannel;
+    topHeight = btmHeight;
+    topWidth = btmWidth;
+
+    btmChannel = getChannel(topChannel);
+    btmHeight = poolingFlag ? topHeight / 2 : topHeight;
+    btmWidth = poolingFlag ? topWidth / 2 : topWidth;
+  }
+
+  // TODO: Create the last dense layer.
+
+  builder.create<mlir::ReturnOp>(loc);
+
+  os << module << "\n";
+  return success();
+}
+
+static LogicalResult processBenchmarkGen(raw_ostream &os) {
+  // Create a new MLIR context and module.
   MLIRContext context;
-  context.loadDialect<HLSKernelDialect>();
+  context.loadDialect<StandardOpsDialect, HLSKernelDialect>();
   auto module = ModuleOp::create(UnknownLoc::get(&context));
-  OpBuilder builder(module.getBodyRegion());
+  BenchmarkGenerator generator(os, module);
 
+  // Generate corresponding benchmark.
   if (benchmarkType == "cnn") {
-    INIReader cnnConfig(configFilename);
-    if (cnnConfig.ParseError())
-      llvm::outs() << "error: cnn configuration file parse fail\n";
+    INIReader config(configFilename);
+    return generator.genCNN(config);
 
-    auto inputHeight = cnnConfig.GetInteger("config", "inputHeight", 224);
-    llvm::outs() << inputHeight << "\n";
+  } else if (benchmarkType == "blas") {
+    INIReader config(configFilename);
+    return generator.genBLAS(config);
 
-    SmallVector<mlir::Type, 4> types;
-    builder.create<FuncOp>(module.getLoc(), "new_func",
-                           builder.getFunctionType(types, types));
+  } else if (benchmarkType == "isp") {
+    INIReader config(configFilename);
+    return generator.genBLAS(config);
 
-    module.print(os);
-    os << "\n\n";
-  } else if (benchmarkType == "image") {
   } else {
     return failure();
   }
-  return success();
 }
 
 int main(int argc, char **argv) {
@@ -87,7 +230,8 @@ int main(int argc, char **argv) {
     exit(1);
   }
 
-  if (failed(benchmarkGen(output->os()))) {
+  // Process benchmark generation.
+  if (failed(processBenchmarkGen(output->os()))) {
     return 1;
   }