Skip to content

Commit

Permalink
Merge pull request #143 from alanvgreen/avg11
Browse files Browse the repository at this point in the history
Avg11
  • Loading branch information
tcal-x committed Mar 9, 2021
2 parents 7cf2de8 + 7247443 commit 541d47f
Show file tree
Hide file tree
Showing 11 changed files with 602 additions and 52 deletions.
4 changes: 2 additions & 2 deletions common/src/menu.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ void menu_run(struct Menu *menu)
}
else
{
puts("\n");
printf("\nRunning %s\n", item->description );
item->fn();
puts("\n---");
puts("---");
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion proj/mnv2_first/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
export DEFINES :=

# Uncomment this line to use software defined CFU functions in software_cfu.cc
#DEFINES += CFU_SOFTWARE_DEFINED
DEFINES += CFU_SOFTWARE_DEFINED

# Uncomment this line to skip debug code (large effect on performance)
DEFINES += NDEBUG
Expand Down
70 changes: 56 additions & 14 deletions proj/mnv2_first/gateware/mnv2_cfu.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,33 +13,75 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from nmigen_cfu import InstructionBase, Cfu
from .post_process import SRDHMInstruction, RoundingDividebyPOTInstruction
from .param_store import ParamStoreSetter
from nmigen_cfu import Cfu
from .getset import GetSetInstruction, RegisterSetter

OUTPUT_CHANNEL_PARAM_DEPTH = 512

class TemplateInstruction(InstructionBase):
"""Template instruction
"""

class Mnv2RegisterInstruction(GetSetInstruction):
def __init__(self):
self.set_input_depth = RegisterSetter()
self.set_output_depth = RegisterSetter()
self.set_input_offset = RegisterSetter()
self.set_output_offset = RegisterSetter()
self.set_activation_min = RegisterSetter()
self.set_activation_max = RegisterSetter()
self.set_output_batch_size = RegisterSetter()
self.store_output_mutiplier = ParamStoreSetter(
width=32, depth=OUTPUT_CHANNEL_PARAM_DEPTH)
self.store_output_shift = ParamStoreSetter(
width=32, depth=OUTPUT_CHANNEL_PARAM_DEPTH)
self.store_output_bias = ParamStoreSetter(
width=32, depth=OUTPUT_CHANNEL_PARAM_DEPTH)
xetters = {
10: self.set_input_depth,
11: self.set_output_depth,
12: self.set_input_offset,
13: self.set_output_offset,
14: self.set_activation_min,
15: self.set_activation_max,
20: self.set_output_batch_size,
21: self.store_output_mutiplier,
22: self.store_output_shift,
23: self.store_output_bias,
}
super().__init__(xetters)

def register_xetters(self, m):
m.submodules['set_input_depth'] = self.set_input_depth
m.submodules['set_output_depth'] = self.set_output_depth
m.submodules['set_input_offset'] = self.set_input_offset
m.submodules['set_output_offset'] = self.set_output_offset
m.submodules['set_activation_min'] = self.set_activation_min
m.submodules['set_activation_max'] = self.set_activation_max
m.submodules['set_output_batch_size'] = self.set_output_batch_size
m.submodules['store_output_mutiplier'] = self.store_output_mutiplier
m.submodules['store_output_shift'] = self.store_output_shift
m.submodules['store_output_bias'] = self.store_output_bias

def elab(self, m):
with m.If(self.start):
m.d.sync += self.output.eq(self.in0 + self.in1)
m.d.sync += self.done.eq(1)
with m.Else():
m.d.sync += self.done.eq(0)
self.register_xetters(m)


class Mnv2Cfu(Cfu):
"""Simple CFU for Mnv2.
Most functionality is provided through a single set of registers.
"""

def __init__(self):
super().__init__({
0: TemplateInstruction(),
0: Mnv2RegisterInstruction(),
6: RoundingDividebyPOTInstruction(),
7: SRDHMInstruction(),
})

def elab(self, m):
super().elab(m)


def make_cfu():
return Cfu({
# Add instructions here...
0: TemplateInstruction(),
})
return Mnv2Cfu()
114 changes: 114 additions & 0 deletions proj/mnv2_first/gateware/post_process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#!/bin/env python
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from cfu import InstructionBase
from nmigen_cfu import Mux, Signal, signed

from nmigen_cfu import SimpleElaboratable


INT32_MIN = 0x8000_0000
INT32_MAX = 0x7fff_ffff


class SRDHM(SimpleElaboratable):
"""Implements gemmlowp::SaturatingRoundingDoublingHighMul
It multiplies two 32 bit numbers, then returns bits 62 to 31 of the
64 bit result. This is 2x the high word (allowing for saturating and
rounding).
Implemented as a pipeline so that results are always available 3
cycles after setting inputs.
Note that there is a bug to investigated here. This implementation
matches the behavior of the compiled source, however, "nudge" may be
one of two values.
Public Interface
----------------
a: Signal(signed(32)) input
First operand
b: Signal(signed(32)) input
Second operand
result: Signal(signed(32)) output
The result of a*b
"""

def __init__(self):
self.a = Signal(signed(32))
self.b = Signal(signed(32))
self.result = Signal(signed(32))

def elab(self, m):
areg = Signal.like(self.a)
breg = Signal.like(self.b)
ab = Signal(signed(64))
overflow = Signal()

# for some reason negative nudge is not used
nudge = 1 << 30

# cycle 0, register a and b
m.d.sync += [
areg.eq(self.a),
breg.eq(self.b),
]
# cycle 1, decide if this is an overflow and multiply
m.d.sync += [
overflow.eq((areg == INT32_MIN) & (breg == INT32_MIN)),
ab.eq(areg * breg),
]
# cycle 2, apply nudge determine result
m.d.sync += [
self.result.eq(Mux(overflow, INT32_MAX, (ab + nudge)[31:])),
]


class SRDHMInstruction(InstructionBase):
def elab(self, m):
m.submodules['srdhm'] = srdhm = SRDHM()
countdown = Signal(signed(3))
m.d.comb += self.done.eq(countdown == 0)

m.d.comb += [
srdhm.a.eq(self.in0),
srdhm.b.eq(self.in1),
self.output.eq(srdhm.result),
]
with m.If(self.start):
m.d.sync += countdown.eq(2)
with m.Else():
m.d.sync += countdown.eq(Mux(countdown != -1, countdown - 1, -1))


def rounding_divide_by_pot(x, exponent):
"""Implements gemmlowp::RoundingDivideByPOT
This divides by a power of two, rounding to the nearest whole number.
"""
mask = (1 << exponent) - 1
remainder = x & mask
threshold = (mask >> 1) + x[31]
rounding = Mux(remainder > threshold, 1, 0)
return (x >> exponent) + rounding


class RoundingDividebyPOTInstruction(InstructionBase):
def elab(self, m):
m.d.comb += [
self.output.eq(rounding_divide_by_pot(self.in0s, self.in1[:5])),
self.done.eq(1),
]
59 changes: 59 additions & 0 deletions proj/mnv2_first/src/cpp_math.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Copyright 2021 The CFU-Playground Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "cpp_math.h"

#include "cfu.h"
#include "tensorflow/lite/kernels/internal/common.h"

int32_t cpp_math_mul_by_quantized_mul_software(int32_t x,
int32_t quantized_multiplier,
int shift) {
return tflite::MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
}

// Expanded source of tflite::MultiplyByQuantizedMultiplier
int32_t cpp_math_mul_by_quantized_mul_gateware1(int32_t x,
int32_t quantized_multiplier,
int shift) {
using gemmlowp::RoundingDivideByPOT;
int left_shift = shift > 0 ? shift : 0;
int right_shift = shift > 0 ? 0 : -shift;
int64_t left_shifted = x * (1 << left_shift);
int32_t srdhm = cfu_op7_hw(0, left_shifted, quantized_multiplier);
return RoundingDivideByPOT(srdhm, right_shift);
}

// Expanded source of tflite::MultiplyByQuantizedMultiplier
int32_t cpp_math_mul_by_quantized_mul_gateware2(int32_t x,
int32_t quantized_multiplier,
int shift) {
int left_shift = shift > 0 ? shift : 0;
int right_shift = shift > 0 ? 0 : -shift;
int32_t left_shifted = x << left_shift;
int32_t srdhm = cfu_op7_hw(0, left_shifted, quantized_multiplier);
return cfu_op6_hw(0, srdhm, right_shift);
}


int32_t cpp_math_srdhm_software(int32_t a, int32_t b) {
return gemmlowp::SaturatingRoundingDoublingHighMul(a, b);
}


int32_t cpp_math_rdbpot_software(int32_t value, int shift) {
return gemmlowp::RoundingDivideByPOT(value, shift);
}
44 changes: 44 additions & 0 deletions proj/mnv2_first/src/cpp_math.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright 2021 The CFU-Playground Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _CPP_MATH_H
#define _CPP_MATH_H

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

// C++ (and other) math functions for C

int32_t cpp_math_mul_by_quantized_mul_software(int32_t x, int32_t quantized_multiplier,
int shift);

int32_t cpp_math_mul_by_quantized_mul_gateware1(int32_t x, int32_t quantized_multiplier,
int shift);

int32_t cpp_math_mul_by_quantized_mul_gateware2(int32_t x, int32_t quantized_multiplier,
int shift);

int32_t cpp_math_srdhm_software(int32_t a, int32_t b);

// Rounding divide by power of two
int32_t cpp_math_rdbpot_software(int32_t value, int shift);

#ifdef __cplusplus
}
#endif
#endif // _CPP_MATH_H
12 changes: 8 additions & 4 deletions proj/mnv2_first/src/golden_op_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1329,14 +1329,18 @@ void golden_op_run_1x1conv(void) {
puts("");

// Check results are ok
bool ok = true;
int fails = 0;
for (size_t i = 0; i < 2400; i++) {
if (actual_output[i] != expected_output[i]) {
printf("FAIL - output tensor mismatch at %u\n", i);
ok = false;
if (!fails) {
printf("FAIL - first output tensor mismatch at %u\n", i);
}
fails++;
}
}
if (ok) {
if (fails) {
printf("FAIL - %d fails\n", fails);
} else {
puts("OK - output tensor matches");
}
}

0 comments on commit 541d47f

Please sign in to comment.