-
Notifications
You must be signed in to change notification settings - Fork 0
/
div.py
894 lines (722 loc) · 34.5 KB
/
div.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
"""Soft-core divider components."""
from amaranth import Elaboratable, Module, Signal, signed, C, unsigned
from amaranth.lib.data import StructLayout
from amaranth.lib.wiring import Signature, In, Out, Component
from amaranth.lib.enum import Enum, auto
class Sign(Enum):
"""Indicate the type of divide to be performed.
* ``UNSIGNED``: Both inputs ``n`` and ``d`` are unsigned.
The output is unsigned.
* ``SIGNED``: Both inputs ``n`` and ``d`` are unsigned.
The quotient and remainder are signed. The remainder takes
the sign of the dividend.
"""
UNSIGNED = auto()
SIGNED = auto()
class Inputs(StructLayout):
r"""Tagged union representing the signedness of divider inputs.
* When :attr:`sign` is ``UNSIGNED``, both :attr:`n` and :attr:`d` are
treated by the divider as :class:`~amaranth:amaranth.hdl.Value`\ s
with an :data:`~amaranth:amaranth.hdl.unsigned`
:class:`~amaranth:amaranth.hdl.Shape`.
* When ``sign`` is ``SIGNED``, both ``n`` and ``d`` are treated as Values
with a :data:`~amaranth:amaranth.hdl.signed` Shape.
Parameters
----------
width : int
Width in bits of both inputs ``n`` and ``d``. For signed divides, this
includes the sign bit.
Attributes
----------
sign: Sign
Controls the interpretation of the bit patterns of ``n`` and ``d``
during division.
n: Signal(width)
The dividend; i.e. the ':math:`n`' in :math:`n / d`.
d: Signal(width)
The divisor; i.e. the ':math:`d`' in :math:`n / d`.
"""
def __init__(self, width):
super().__init__({
"sign": Sign,
"n": unsigned(width),
"d": unsigned(width),
})
class Outputs(StructLayout):
r"""Tagged union representing the signedness of divider outputs.
* When :attr:`sign` is ``UNSIGNED``, :attr:`q` and :attr:`r` should be
treated as :class:`~amaranth:amaranth.hdl.Value`\ s with an
:class:`~amaranth:amaranth.hdl.as_unsigned`
:class:`~amaranth:amaranth.hdl.Shape`.
* When ``sign`` is ``SIGNED``, ``q`` and ``r`` should be treated as Values
with a :class:`~amaranth:amaranth.hdl.as_signed` Shape.
Parameters
----------
width : int
Width in bits of the outputs ``q`` and ``r``. For signed dividers, this
includes the sign bit.
Attributes
----------
sign: Sign
Indicates whether the divider that produced this quotient/remainder
was signed or unsigned.
q: Signal(width)
The quotient of :math:`n / d`.
r: Signal(width)
The remainder of :math:`n / d`, i.e. :math:`n \bmod d`.
"""
def __init__(self, width):
super().__init__({
"sign": Sign,
"q": unsigned(width),
"r": unsigned(width)
})
def divider_input_signature(width):
"""Create a parametric divider input port.
This function returns a :class:`~amaranth:amaranth.lib.wiring.Signature`
that's usable as a transfer initiator to a divider. A divider starts
on the current cycle when both ``valid`` and ``rdy`` are asserted.
Parameters
----------
width : int
Width in bits of the inputs :attr:`n` and :attr:`d`. For signed
divides, this includes the sign bit.
Returns
-------
:class:`~amaranth:amaranth.lib.wiring.Signature`
:class:`~amaranth:amaranth.lib.wiring.Signature` containing the
following members:
.. attribute:: .payload
:type: Out(Inputs)
:noindex:
Data input to divider.
.. attribute:: .ready
:type: In(1)
:noindex:
When ``1``, indicates that divider is ready.
.. attribute:: .valid
:type: Out(1)
:noindex:
When ``1``, indicates that divider data input is valid.
"""
return Signature({
"payload": Out(Inputs(width)),
"ready": In(1),
"valid": Out(1)
})
def divider_output_signature(width):
"""Create a parametric divider output port.
This function returns a :class:`~amaranth:amaranth.lib.wiring.Signature`
that's usable as a transfer initiator **from** a divider.
.. note:: For a core responding **to** a divider, which is the typical
use case, you will want to use this Signature with the
:data:`~amaranth:amaranth.lib.wiring.In` flow, like so:
.. doctest::
>>> from smolarith.div import divider_output_signature
>>> from amaranth.lib.wiring import Signature, In
>>> my_receiver_sig = Signature({
... "inp": In(divider_output_signature(width=8))
... })
Parameters
----------
width : int
Width in bits of the outputs ``q`` and ``r``. For signed divides, this
includes the sign bit.
Returns
-------
:class:`~amaranth:amaranth.lib.wiring.Signature`
:class:`~amaranth:amaranth.lib.wiring.Signature` containing the
following members:
.. attribute:: .payload
:type: Out(Outputs)
:noindex:
Data output **from** divider.
.. attribute:: .ready
:type: In(1)
:noindex:
When ``1``, indicates that responder is ready to receive results
from divider.
.. attribute:: .valid
:type: Out(1)
:noindex:
When ``1``, indicates that divider output data input is valid.
"""
return Signature({
"payload": Out(Outputs(width)),
"ready": In(1),
"valid": Out(1)
})
class _Quadrant(StructLayout):
"""Store sign information about divider inputs."""
def __init__(self): # noqa: DOC
super().__init__({
"n": unsigned(1),
"d": unsigned(1),
})
class _Negate(Component):
def __init__(self, width=8): # noqa: DOC
self.width = width
super().__init__({
"inp": In(self.width),
"outp": Out(self.width)
})
def elaborate(self, plat):
m = Module()
m.d.comb += self.outp.eq((-self.inp.as_signed())[:-1])
return m
class Impl(Enum):
"""Indicate the division algorithm to use inside a dividing component.
* ``RESTORING``: Use :ref:`restoring division <derive-res>` for the
internal divider. *This is a good default*.
* ``NON_RESTORING``: Use :ref:`non-restoring division <derive-nr>` for the
internal divider.
"""
RESTORING = auto()
NON_RESTORING = auto()
class MulticycleDiv(Component):
# FIXME: I can't be .. _latency label to work, even with :ref:`latency`...
# always "undefined label"... huh?!
r"""Restoring/Non-restoring divider soft-core.
This divider core is a gateware implementation of
:ref:`restoring division <derive-res>` or
:ref:`non-restoring division <derive-nr>`. It works for both signed and
unsigned divides, and should be preferred to :class:`LongDivider` in almost
all circumstances due to better resource usage.
* A divide starts on the current cycle when both ``inp.valid`` and
``inp.ready`` are asserted.
* A divide result is available when ``outp.valid`` is asserted. The
result is read/overwritten once a downstream core asserts ``outp.ready``.
* Latency: Divide Results for a restoring divider will be available
``width + 3`` clock cycles after assertion of both ``inp.valid`` and
``inp.ready``. For a non-restoring divider, results are available after
``width + 4`` clock cycles.
* Throughput: One divide maximum is finished every ``width + 3``
clock cycles for a restoring divider, and every ``width + 4`` for a
non-restoring divider.
Parameters
----------
width : int
Width in bits of both inputs ``n`` and ``d``. For signed
multiplies, this includes the sign bit. Outputs ``q`` and ``r`` width
will be the same width.
impl : Impl
Choose whether to use a restoring divider or non-restoring divider
internally. The default of ``RESTORING`` is fine for most cases.
Attributes
----------
width : int
Bit width of the inputs ``n`` and ``d``, and outputs ``q`` and ``r``.
inp : In(divider_input_signature(width))
Input interface to the divider.
outp : Out(divider_output_signature(width))
Output interface of the divider.
Notes
-----
* This divider is implemented using either restoring or non-restoring
division. It is basically a gateware translation of the
:ref:`Python <resdiv-py>` :ref:`implementations <nrdiv-py>` shown in
:ref:`impl`.
* Unlike the multipliers, where I could eyeball approximate storage element
usage, divider size was benchmarked using
`yosys <https://yosyshq.net/yosys/>`_\ 's ``synth_ice40`` pass. For an
:math:`n`-bit divide:
* ``impl=Impl.RESTORING`` requires approximately :math:`O(7*n)`
storage elements and :math:`O(12.75*n)` LUT4s.
* ``impl=Impl.NON_RESTORING`` requires approximately :math:`O(8*n)`
storage elements and :math:`O(14.5*n)` LUT4s.
* Internally, the divider converts its operands from signed to unsigned
if necessary, performs the division, and the converts back from unsigned
to signed if necessary. I ran into some :ref:`issues <signedness>` with
making a signed-aware non-restoring divider such that eating the
conversion cost latency is probably justifiable for implementation
simplicity of both a restoring and non-restoring divider.
.. _latency:
The combination of signed-to-unsigned conversion, using a cycle to
latching inputs to the internal divider, and unsigned-to-signed
conversion adds three cycles of latency beyond the expected ``width``
number of cycles to perform a division.
Additionally, when using a non-restoring divider, the quotient and
remainder require a possible :ref:`final restore step <nrdiv-restore>`.
Remainder restore is implemented as shown in the
:ref:`Python code <nrdiv-py>`. Quotient restore is implemented by
subtracting the ones complement of the raw quotient from the raw quotient
itself.
* The quotient and remainder share a backing store; new bits are shifted
into the lower-half quotient portion as the remainder is shifted into the
upper half. This works because each iteration of the
restoring/non-restoring loops check the sign of the remainder, and the
lower quotient bits won't affect the sign bit.
* This divider is compliant with RISC-V semantics for divide-by-zero and
overflow when ``width=32`` or ``width=64``\ [rv]_. Specifically:
* Signed/unsigned divide-by-zero returns "all bits set" for the
quotient and the dividend as the remainder.
* Signed overflow (:math:`-2^{\{31,63\}}/-1`) returns
:math:`-2^{\{31,63\}}` for the quotient and :math:`0` as the
remainder.
Future Directions
-----------------
* It may be worthwhile to make a pipelined divider?
* Current latency is worse than the :class:`LongDivider`, which is just
about the only advantage to ``LongDivider``. We can provide an option to
reduce latency/resources required due to the signed/unsigned and restore
stages at the cost of some timing closure.
"""
def __init__(self, width=8, impl=Impl.RESTORING):
self.width = width
if impl == Impl.RESTORING:
self.div_impl = _RestoringDiv(width)
elif impl == Impl.NON_RESTORING:
self.div_impl = _NonRestoringDiv(width)
self.negate_nq = _Negate(width)
self.negate_dr = _Negate(width)
super().__init__({
"inp": In(divider_input_signature(self.width)),
"outp": Out(divider_output_signature(self.width))
})
def elaborate(self, plat): # noqa: D102
m = Module()
m.submodules.div_impl = self.div_impl
# Time-division multiplex negation units... in a multicycle div, only
# n/d or q/r need to be negated at once.
m.submodules.negate_nq = self.negate_nq
m.submodules.negate_dr = self.negate_dr
quad = Signal(_Quadrant())
div_inputs = Signal(Inputs(self.width))
m.d.comb += self.div_impl.inp.payload.eq(div_inputs)
with m.FSM():
with m.State("IDLE"):
m.d.comb += [
self.inp.ready.eq(1),
self.negate_nq.inp.eq(self.inp.payload.n),
self.negate_dr.inp.eq(self.inp.payload.d)
]
m.d.comb += self.inp.ready.eq(1)
with m.If(self.inp.valid):
m.next = "DIVIDE_IN"
# Re: (self.inp.payload.d != 0)
# For RISCV compliance when dividing by zero, we need to
# suppress signed-unsigned conversion. Treating the input
# bit pattern as-is will result in the correct behavior of
# "all bits set in divisor" and remainder unchanged,
# regardless of sign.
m.d.sync += [
quad.n.eq(self.inp.payload.n[-1] &
(self.inp.payload.d != 0)),
quad.d.eq(self.inp.payload.d[-1]),
div_inputs.eq(self.inp.payload)
]
with m.If(self.inp.payload.sign == Sign.SIGNED):
with m.If(self.inp.payload.n[-1] &
(self.inp.payload.d != 0)):
m.d.sync += div_inputs.n.eq(self.negate_nq.outp)
with m.If(self.inp.payload.d[-1]):
m.d.sync += div_inputs.d.eq(self.negate_dr.outp)
with m.State("DIVIDE_IN"):
m.d.comb += self.div_impl.inp.valid.eq(1)
with m.If(self.div_impl.inp.ready):
m.next = "DIVIDE_LOOP"
with m.State("DIVIDE_LOOP"):
m.d.comb += [
self.div_impl.outp.ready.eq(1),
self.negate_nq.inp.eq(self.div_impl.outp.payload.q),
self.negate_dr.inp.eq(self.div_impl.outp.payload.r)
]
with m.If(self.div_impl.outp.valid):
m.next = "DIVIDE_OUT"
m.d.sync += [
self.outp.payload.eq(self.div_impl.outp.payload),
]
with m.If((div_inputs.sign == Sign.SIGNED) &
quad.n & ~quad.d):
m.d.sync += [
self.outp.payload.q.eq(self.negate_nq.outp),
self.outp.payload.r.eq(self.negate_dr.outp)
]
with m.If((div_inputs.sign == Sign.SIGNED) &
~quad.n & quad.d):
m.d.sync += self.outp.payload.q.eq(self.negate_nq.outp)
with m.If((div_inputs.sign == Sign.SIGNED) &
quad.n & quad.d):
m.d.sync += self.outp.payload.r.eq(self.negate_dr.outp)
with m.State("DIVIDE_OUT"):
m.d.comb += self.outp.valid.eq(1)
with m.If(self.outp.ready):
m.next = "IDLE"
return m
class _RestoringDiv(Component):
"""Unsigned-only restoring divider."""
def __init__(self, width=8): # noqa: DOC
self.width = width
# sign Signals are unused- Sign.UNSIGNED is implicit.
super().__init__({
"inp": In(divider_input_signature(self.width)),
"outp": Out(divider_output_signature(self.width))
})
def elaborate(self, platform):
m = Module()
# Need extra bit because we need to subtract up to 2^width, which
# should remain negative.
intermediate = Signal(2*self.width + 1)
iters_left = Signal(range(self.inp.payload.n.shape().width + 1))
in_progress = Signal()
s_copy = Signal(Sign)
d_copy = Signal(self.inp.payload.d.shape())
m.d.comb += self.inp.ready.eq((self.outp.ready & self.outp.valid) |
~in_progress)
m.d.comb += in_progress.eq(iters_left != 0)
with m.If(self.outp.valid & self.outp.ready):
m.d.sync += self.outp.valid.eq(0)
with m.If(self.inp.ready & self.inp.valid):
m.d.sync += [
s_copy.eq(self.inp.payload.sign),
d_copy.eq(self.inp.payload.d),
iters_left.eq(self.inp.payload.n.shape().width),
intermediate.eq(self.inp.payload.n)
]
with m.If(in_progress):
# State Control
m.d.sync += iters_left.eq(iters_left - 1)
with m.If((iters_left - 1) == 0):
m.d.sync += self.outp.valid.eq(1)
# Loop iter
# S = (S << 1) - D
with m.If(((intermediate << 1) - (d_copy << self.width)) < 0):
# S += D
m.d.sync += intermediate.eq((intermediate << 1)) # noqa: E501
with m.Else():
# q <<=1 and then q |= 1
# (Docs do q |= 1 and then q <<= 1, requiring a >> at the end.)
m.d.sync += intermediate.eq(((intermediate << 1) -
(d_copy << self.width)) | 1) # noqa: E501
m.d.comb += [
self.outp.payload.q.eq(intermediate[:self.width]),
self.outp.payload.r.eq(intermediate[self.width:]),
self.outp.payload.sign.eq(s_copy)
]
return m
class _NonRestoringDiv(Component):
"""Unsigned-only non-restoring divider."""
def __init__(self, width=8): # noqa: DOC
self.width = width
# sign Signals are unused- Sign.UNSIGNED is implicit.
super().__init__({
"inp": In(divider_input_signature(self.width)),
"outp": Out(divider_output_signature(self.width))
})
def elaborate(self, platform):
m = Module()
# Need extra bit because we need to subtract up to 2^width, which
# should remain negative.
intermediate = Signal(2*self.width + 1)
iters_left = Signal(range(self.inp.payload.n.shape().width + 1))
in_progress = Signal()
restore_step = Signal()
s_copy = Signal(Sign)
d_copy = Signal(self.inp.payload.d.shape())
r = Signal(self.width)
m.d.comb += self.inp.ready.eq((self.outp.ready & self.outp.valid) |
~in_progress)
m.d.comb += in_progress.eq((iters_left != 0) | restore_step)
with m.If(self.outp.valid & self.outp.ready):
m.d.sync += self.outp.valid.eq(0)
with m.If(self.inp.ready & self.inp.valid):
m.d.sync += [
s_copy.eq(self.inp.payload.sign),
d_copy.eq(self.inp.payload.d),
iters_left.eq(self.inp.payload.n.shape().width),
intermediate.eq(self.inp.payload.n)
]
with m.If(in_progress & ~restore_step):
# State Control
m.d.sync += iters_left.eq(iters_left - 1)
with m.If((iters_left - 1) == 0):
m.d.sync += restore_step.eq(1)
# Loop iter
with m.If(intermediate[-1]):
# S = (S << 1) + D, encoded in top half.
# q = -1, encoded as 0, encoded in bottom half.
m.d.sync += intermediate.eq(((intermediate << 1) +
(d_copy << self.width))) # noqa: E501
with m.Else():
# S = (S << 1) - D, encoded in top half.
# q = 1, encoded as 1, encoded in bottom half.
m.d.sync += intermediate.eq(((intermediate << 1) -
(d_copy << self.width)) | 1) # noqa: E501
with m.If(restore_step):
m.d.sync += [
restore_step.eq(0),
self.outp.valid.eq(1)
]
# Extract encoded negative powers of two and then subtract as if
# they were positive powers of two (so no twos complement
# conversion needed).
# Better LUT usage if "r" has its own register.
m.d.sync += [
intermediate[:self.width].eq(
intermediate[:self.width] - ~intermediate[:self.width]),
r.eq(intermediate[self.width:])
]
with m.If(intermediate[-1]):
# S += D
# q -= 1
m.d.sync += [
intermediate[:self.width].eq(
intermediate[:self.width] -
~intermediate[:self.width] -
1),
r.eq(intermediate[self.width:] + d_copy)
]
m.d.comb += [
self.outp.payload.q.eq(intermediate[:self.width]),
self.outp.payload.r.eq(r),
self.outp.payload.sign.eq(s_copy)
]
return m
class LongDivider(Component):
r"""Long-division soft-core, used as a reference.
This divider core is a gateware implementation of classic long division.
* A divide starts on the current cycle when both ``inp.valid`` and
``inp.ready`` are asserted.
* A divide result is available when ``outp.valid`` is asserted. The
result is read/overwritten once a downstream core asserts ``outp.ready``.
.. warning::
This core is not intended to be used in user designs due to poor
resource usage. It is mainly kept as a known-to-work reference design
for possible equivalence checking later. Use :class:`MulticycleDiv`
instead.
* Latency: Divide Results for a will be available ``width`` clock cycles
after assertion of both ``inp.valid`` and ``inp.ready``.
* Throughput: One divide maximum is finished every ``width``
clock cycles.
Parameters
----------
width : int
Width in bits of both inputs ``n`` and ``d``. For signed
multiplies, this includes the sign bit. Outputs ``q`` and ``r`` width
will be the same width.
Attributes
----------
width : int
Bit width of the inputs ``n`` and ``d``, and outputs ``q`` and ``r``.
inp : In(divider_input_signature(width))
Input interface to the divider.
outp : Out(divider_output_signature(width))
Output interface of the divider.
Notes
-----
* This divider is a naive long-division implementation, similar to how
pen-and-pad division is done in base-2/10.
* Internally, the divider is aware of the sign of its inputs as well as
the sign of the divide operation.
The divider will dispatch to one of the 4 possible combinations of signs
during the division loop. The four combinations are:
* Add shifted copies of **positive**/**negative** powers of two
together to form the quotient.
* **Add**/**subtract** shifted copies of the divisor from the dividend
**towards zero** to form the remainder.
* This divider is compliant with RISC-V semantics for divide-by-zero and
overflow when ``width=32`` or ``width=64``\ [rv]_. Specifically:
* Signed/unsigned divide-by-zero returns "all bits set" for the
quotient and the dividend as the remainder.
* Signed overflow (:math:`-2^{\{31,63\}}/-1`) returns
:math:`-2^{\{31,63\}}` for the quotient and :math:`0` as the
remainder.
.. [rv] https://github.com/riscv/riscv-isa-manual/releases/tag/Ratified-IMAFDQC
"""
def __init__(self, width=8):
self.width = width
super().__init__({
"inp": In(divider_input_signature(self.width)),
"outp": Out(divider_output_signature(self.width))
})
def elaborate(self, platform): # noqa: D102
m = Module()
m.submodules.mag = mag = _MagnitudeComparator(2*self.width)
quotient = Signal(2*self.width)
remainder = Signal(2*self.width)
iters_left = Signal(range(self.width))
in_progress = Signal()
s_copy = Signal(Sign)
a_sign = Signal()
n_sign = Signal()
# Reduce latency by 1 cycle by preempting output being read.
m.d.comb += self.inp.ready.eq((self.outp.ready & self.outp.valid) |
~in_progress)
m.d.comb += in_progress.eq(iters_left != 0)
m.d.comb += [
self.outp.payload.q.eq(quotient),
self.outp.payload.r.eq(remainder),
self.outp.payload.sign.eq(s_copy),
]
with m.If(self.outp.ready & self.outp.valid):
m.d.sync += self.outp.valid.eq(0)
with m.If(self.inp.ready & self.inp.valid):
m.d.sync += [
# We handle first cycle using shift_amt mux.
iters_left.eq(self.width - 1),
a_sign.eq(self.inp.payload.n[-1]),
n_sign.eq(self.inp.payload.d[-1]),
s_copy.eq(self.inp.payload.sign)
]
# When dividing by 0, for RISCV compliance, we need the division to
# return -1. Without redirecting quotient calculation to the
# "both dividend/divisor positive" case, dividing a negative
# number by 0 returns 1. Note that the sign-bit processing doesn't
# need to be special-cased, I do it anyway in case I see some
# patterns I can refactor out.
with m.If((self.inp.payload.sign == Sign.SIGNED) &
(self.inp.payload.d == 0) &
self.inp.payload.n[-1]):
m.d.sync += a_sign.eq(0)
m.d.sync += n_sign.eq(0)
# We are committed to do a calc at this point, so might as well
# start it now.
#
# Division quick refresh:
#
# Dividend / Divisor = Quotient
# Dividend % Divisor = Remainder (takes the sign of the Dividend)
#
# Quotient
# Divisor)Dividend
# Remainder
#
# We're doing binary long division in hardware.
# If starting with a positive dividend, we want to subtract
# positive numbers from the dividend until we get as close to 0
# without going below.
# If '' negative dividend '' subtract negative numbers '' as close
# to 0 without going above. This becomes the remainder.
#
# We create the numbers to subtract from the dividend by
# multiplying the divisor by either positive or negative powers
# of two (shifting), depending on the sign of both the dividend
# and divisor. When we add powers all of these two together, we
# get the quotient.
#
# Either way, we can only subtract a shifted divisor from the
# dividend if:
#
# * divisor*2**n <= dividend if dividend is positive.
# * -divisor*2**n >= dividend if dividend is negative.
#
# We can use a magnitude comparator for this.
shift_amt = 2**(self.width - 1)
with m.If(self.inp.payload.sign == Sign.SIGNED):
m.d.comb += [
mag.divisor.eq((self.inp.payload.d.as_signed() * shift_amt).as_signed()), # noqa: E501
mag.dividend.eq(self.inp.payload.n.as_signed())
]
with m.Else():
m.d.comb += [
mag.divisor.eq(self.inp.payload.d.as_unsigned() *
shift_amt),
mag.dividend.eq(self.inp.payload.n.as_unsigned())
]
with m.If(mag.o):
# If dividend/divisor are positive, subtract a positive
# shifted divisor from dividend.
with m.If((self.inp.payload.sign == Sign.UNSIGNED) |
(~self.inp.payload.n[-1] & ~self.inp.payload.d[-1]) |
(self.inp.payload.n[-1] & self.inp.payload.d == 0)):
m.d.sync += quotient.eq(C(1) * shift_amt)
with m.If(self.inp.payload.sign == Sign.SIGNED):
# If high bit is set, and, a signed div,
# we want to sign-extend.
m.d.sync += remainder.eq(self.inp.payload.n.as_signed() - # noqa: E501
(self.inp.payload.d.as_signed() * C(1) * shift_amt).as_signed()) # noqa: E501
with m.Else():
# Otherwise, zero-extend.
m.d.sync += remainder.eq(self.inp.payload.n.as_unsigned() - # noqa: E501
(self.inp.payload.d.as_unsigned() * C(1) * shift_amt)) # noqa: E501
# If dividend is negative, but divisor is positive, create a
# negative shifted divisor and subtract from the dividend.
with m.If((self.inp.payload.sign == Sign.SIGNED) &
self.inp.payload.n[-1] & ~self.inp.payload.d[-1] &
~(self.inp.payload.d == 0 & self.inp.payload.n[-1])):
m.d.sync += quotient.eq(C(-1) * shift_amt)
m.d.sync += remainder.eq(self.inp.payload.n.as_signed() -
(self.inp.payload.d.as_signed() * C(-1) * shift_amt).as_signed()) # noqa: E501
# If dividend is positive, but divisor is negative, create a
# positive shifted divisor and subtract from the dividend.
with m.If((self.inp.payload.sign == Sign.SIGNED) &
~self.inp.payload.n[-1] & self.inp.payload.d[-1]):
m.d.sync += quotient.eq(C(-1) * shift_amt)
m.d.sync += remainder.eq(self.inp.payload.n.as_signed() -
(self.inp.payload.d.as_signed() * C(-1) * shift_amt).as_signed()) # noqa: E501
# If dividend/divisor is negative, subtract a negative
# shifted divisor and subtract from the dividend.
with m.If((self.inp.payload.sign == Sign.SIGNED) &
self.inp.payload.n[-1] & self.inp.payload.d[-1]):
m.d.sync += quotient.eq(C(1) * shift_amt)
m.d.sync += remainder.eq(self.inp.payload.n.as_signed() -
(self.inp.payload.d.as_signed() * C(1) * shift_amt).as_signed()) # noqa: E501
with m.Else():
m.d.sync += quotient.eq(0)
with m.If(self.inp.payload.sign == Sign.SIGNED):
# If high bit is set, and, a signed div,
# we want to sign-extend.
m.d.sync += remainder.eq(self.inp.payload.n.as_signed())
with m.Else():
# Otherwise, zero-extend.
m.d.sync += remainder.eq(self.inp.payload.n.as_unsigned())
# Main division loop.
with m.If(in_progress):
m.d.sync += iters_left.eq(iters_left - 1)
shift_amt = (1 << (iters_left - 1).as_unsigned())
with m.If(self.inp.payload.sign == Sign.SIGNED):
m.d.comb += [
mag.divisor.eq(self.inp.payload.d.as_signed() * shift_amt),
mag.dividend.eq(remainder.as_signed())
]
with m.Else():
m.d.comb += [
mag.divisor.eq(self.inp.payload.d.as_unsigned() * shift_amt), # noqa: E501
mag.dividend.eq(remainder)
]
with m.If(mag.o):
# If dividend/divisor are positive, subtract a positive
# shifted divisor from dividend.
with m.If((s_copy == Sign.UNSIGNED) | (~a_sign & ~n_sign)):
m.d.sync += quotient.eq(quotient + C(1) * shift_amt)
with m.If(s_copy == Sign.SIGNED):
# If high bit is set, and, a signed div,
# we want to sign-extend.
m.d.sync += remainder.eq(remainder -
(self.inp.payload.d.as_signed() * C(1) * shift_amt).as_signed()) # noqa: E501
with m.Else():
# Otherwise, zero-extend.
m.d.sync += remainder.eq(remainder -
(self.inp.payload.d.as_unsigned() * C(1) * shift_amt)) # noqa: E501
# If dividend is negative, but divisor is positive, create a
# negative shifted divisor and subtract from the dividend.
with m.If((s_copy == Sign.SIGNED) & a_sign & ~n_sign):
m.d.sync += quotient.eq(quotient + C(-1) * shift_amt)
m.d.sync += remainder.eq(remainder -
(self.inp.payload.d.as_signed() * C(-1) * shift_amt).as_signed()) # noqa: E501
# If dividend is positive, but divisor is negative, create a
# positive shifted divisor and subtract from the dividend.
with m.If((s_copy == Sign.SIGNED) & ~a_sign & n_sign):
m.d.sync += quotient.eq(quotient + C(-1) * shift_amt)
m.d.sync += remainder.eq(remainder -
(self.inp.payload.d.as_signed() * C(-1) * shift_amt).as_signed()) # noqa: E501
# If dividend/divisor are negative, subtract a negative
# shifted divisor from dividend.
with m.If((s_copy == Sign.SIGNED) & a_sign & n_sign):
m.d.sync += quotient.eq(quotient + C(1) * shift_amt)
m.d.sync += remainder.eq(remainder -
(self.inp.payload.d.as_signed() * C(1) * shift_amt).as_signed()) # noqa: E501
with m.If(iters_left - 1 == 0):
m.d.sync += self.outp.valid.eq(1)
return m
class _MagnitudeComparator(Elaboratable):
"""Compare the magnitude of two signed numbers."""
def __init__(self, width=8): # noqa: DOC
self.width = width
self.dividend = Signal(signed(self.width))
self.divisor = Signal(signed(self.width))
self.o = Signal()
def elaborate(self, platform):
m = Module()
m.d.comb += self.o.eq(abs(self.divisor) <= abs(self.dividend))
return m