From 4c9909f1109f1556f9459f3dbeadfd46e1cc18c6 Mon Sep 17 00:00:00 2001 From: wangkui Date: Sun, 10 Mar 2019 00:05:14 +0800 Subject: [PATCH] fix bugs in testbench --- rtl/R5FP_add_mul.v | 84 ++++-- rtl/R5FP_div.v | 603 ++++++++++++++++++++-------------------- rtl/R5FP_int_div_sqrt.v | 270 +++++++++--------- rtl/R5FP_postproc.v | 108 ++++--- rtl/R5FP_sqrt.v | 426 ++++++++++++++-------------- tb/tb_tf_fadd.v | 74 ++++- tb/tb_tf_fdiv.v | 16 +- tb/tb_tf_fmac.v | 39 +-- tb/tb_tf_fmul.v | 15 +- tb/tb_tf_fsqrt.v | 5 + 10 files changed, 906 insertions(+), 734 deletions(-) diff --git a/rtl/R5FP_add_mul.v b/rtl/R5FP_add_mul.v index 56bf49b..04b0f6a 100644 --- a/rtl/R5FP_add_mul.v +++ b/rtl/R5FP_add_mul.v @@ -9,6 +9,7 @@ module R5FP_add #( input [EXP_W+SIG_W:0] a, b, output [EXP_W-1:0] zExp, + output reg [EXP_W-1:0] tailZeroCnt, output [6-1:0] zStatus, output [SIG_W+4-1:0] zSig, output zSign); @@ -30,6 +31,13 @@ R5FP_add_inner #( .zStatus(zStatus), .zSig(zSig), .zSign(zSign)); + +always @(*) begin + tailZeroCnt=0; + if(zExp>=`EXP_DENORMAL_MIN(EXP_W-1,SIG_W) && zExp<=`EXP_DENORMAL_MAX(EXP_W-1)) begin + tailZeroCnt=1+(`EXP_DENORMAL_MAX(EXP_W-1)-zExp); + end +end endmodule module R5FP_add_inner #( @@ -41,12 +49,11 @@ module R5FP_add_inner #( input [2:0] rnd, output [EXP_W-1:0] zExp, - output [6-1:0] zStatus, + output reg [6-1:0] zStatus, output [SIG_W+4-1:0] zSig, output zSign); wire sign, isNaN, isINF, useA, useB, isZero0, signalNaN, isInvalid; -wire a_s; R5FP_add_special_cases #( .EXP_W(EXP_W), .SIG_W(SIG_W)) sp ( @@ -67,7 +74,6 @@ R5FP_add_core #( .SIG_W(SIG_W)) add ( .a(a), .b(b), .GRT(GRT), .isZero(isZero), .z(z_tmp)); -reg [4:0] zStatus; always @(*) begin zStatus=0; zStatus[`INVALID]=isInvalid; @@ -183,8 +189,8 @@ wire [SIG_W+3:0] bSig={2'b01,b[SIG_W-1:0],2'b0}; wire [EXP_W-1:0] aExp=a[EXP_W+SIG_W-1:SIG_W]; wire [EXP_W-1:0] bExp=b[EXP_W+SIG_W-1:SIG_W]; -parameter width = SIG_W+1; -parameter addr_width = $clog2(width); +localparam width = SIG_W+1; +localparam addr_width = $clog2(width); /* verilator lint_off UNUSED */ /* verilator lint_off WIDTH */ `include "DW_lza_function.inc" @@ -211,9 +217,9 @@ reg [SIG_W+3:0] largerSig, smallerSig, smallerSigSh; reg [SIG_W+3:0] smallerSigSh1; /* verilator lint_on UNUSED */ reg [SIG_W+3:0] zSigExt; -parameter [1:0] NORMAL_ADD=0; -parameter [1:0] NORMAL_SUB=1; -parameter [1:0] SPECIAL_SUB=2; +localparam [1:0] NORMAL_ADD=0; +localparam [1:0] NORMAL_SUB=1; +localparam [1:0] SPECIAL_SUB=2; always @(*) begin lzCount=0; @@ -290,11 +296,13 @@ always @(*) begin end //`DEBUG("Here3 a:%b b:%b zExp:%b (%b) zSigExt:%b sticky:%b",a,b,zExp,`EXP_NORMAL_MAX(EXP_W-1),zSigExt,sticky); +//synopsys translate_off if(zSigExt[SIG_W+3:SIG_W+2]!=2'b01 && zSigExt!=0) begin $display("zSigExt has wrong leading 1 bit!! %d-- a:%b b:%b largerSig:%b smallerSig:%b smallerSigSh:%b zSigExt:%b", opType, a,b, largerSig, smallerSig, smallerSigSh, zSigExt); $finish(); end +//synopsys translate_on zSig=zSigExt[SIG_W+1:2]; GRT={zSigExt[1:0],sticky}; end @@ -306,8 +314,8 @@ module R5FP_mul #( parameter SIG_W=10) ( input [EXP_W+SIG_W:0] a, b, - output [EXP_W-1:0] zExp, - output [6-1:0] zStatus, + output [EXP_W-1:0] zExp,tailZeroCnt, + output reg [6-1:0] zStatus, output [SIG_W*2+2:0] zSig, output toInf, output zSign); @@ -328,10 +336,9 @@ R5FP_mul_core #( .EXP_W(EXP_W), .SIG_W(SIG_W)) mul ( .a(a), .b(b), .z(z_tmp), - .toInf(toInfPre)); + .toInf(toInfPre), .tailZeroCnt(tailZeroCnt)); assign toInf=toInfPre&&(!isINF)&&(!isNaN); -reg [5:0] zStatus; always @(*) begin zStatus=0; zStatus[`INVALID]=isInvalid; @@ -416,12 +423,40 @@ always@(*) begin end endmodule +module R5FP_mul_by_1 #( + parameter EXP_W=5, + parameter SIG_W=10) ( + input [EXP_W+SIG_W:0] a, + output [EXP_W-1:0] zExp, + output reg [6-1:0] zStatus, + output [SIG_W*2+2:0] zSig, + output zSign); + +wire [EXP_W-1:0] bExp=(1<=`EXP_DENORMAL_MIN(EXP_W-1,SIG_W) && zExp[EXP_W-1:0]<=`EXP_DENORMAL_MAX(EXP_W-1)) begin + tailZeroCnt=1+(`EXP_DENORMAL_MAX(EXP_W-1)-zExp[EXP_W-1:0]); + end zSig=zSigExt[SIG_W*2:0]; end @@ -481,6 +522,7 @@ module R5FP_acc #( output zToInf, output reg specialTiny, output [EXP_W-1:0] zExp, + output reg [EXP_W-1:0] tailZeroCnt, output reg [6-1:0] zStatus, output [SIG_W*2+4:0] zSig, output zSign); @@ -517,6 +559,13 @@ R5FP_add_inner #( .zSig(zSig), .zSign(zSign)); +always @(*) begin + tailZeroCnt=0; + if(zExp>=`EXP_DENORMAL_MIN(EXP_W-1,SIG_W) && zExp<=`EXP_DENORMAL_MAX(EXP_W-1)) begin + tailZeroCnt=1+(`EXP_DENORMAL_MAX(EXP_W-1)-zExp); + end +end + assign zStatus={zStatusPre[`INVALID]|dStatus[`INVALID], zStatusPre[4:0]}; logic cSign; @@ -527,14 +576,15 @@ wire cIsInfOrNaN=(&cExp)==1; assign zToInf=toInf&&!cIsInfOrNaN; always @(*) begin - reg tooSmall,diffSign,sameSignCross,cMarginal; + reg dSmall,cSmall,diffSign,sameSignCross,cMarginal; specialTiny=0; if(rnd==`RND_NEAREST_EVEN||rnd==`RND_NEAREST_UP) begin - tooSmall=(dExp<=`EXP_DENORMAL_MIN(EXP_W-1,SIG_W)); + dSmall=(dExp==`EXP_DENORMAL_MAX(EXP_W-1) || dExp==`EXP_DENORMAL_MAX(EXP_W-1)-1); cMarginal=(cExp==`EXP_DENORMAL_MIN(EXP_W-1,SIG_W)&&cSig==0); diffSign=(!dStatus[`Z_IS_ZERO])&&(dSign!=cSign); - sameSignCross=(!dStatus[`Z_IS_ZERO])&&(cExp<=`EXP_DENORMAL_MAX(EXP_W-1)&&cSig!=0)&&(dSign==cSign); - if(tooSmall&&(diffSign||sameSignCross)) + cSmall=(cExp==`EXP_DENORMAL_MAX(EXP_W-1) || cExp==`EXP_DENORMAL_MAX(EXP_W-1)-1); + sameSignCross=(!dStatus[`Z_IS_ZERO])&&cSmall&&(dSign==cSign); + if(dSmall||(diffSign||sameSignCross)) specialTiny=1; if(cMarginal) specialTiny=1; diff --git a/rtl/R5FP_div.v b/rtl/R5FP_div.v index 847fc31..2fab78d 100644 --- a/rtl/R5FP_div.v +++ b/rtl/R5FP_div.v @@ -1,297 +1,306 @@ - -`include "R5FP_inc.vh" - -module R5FP_div_special_cases #( - parameter EXP_W=5, - parameter SIG_W=10) ( - input [EXP_W+SIG_W:0] a, b, - output reg sign, isNaN, newNaN, isINF, isInvalid, isZero, isDivByZero, isOne, useA, useB); - -localparam E_MAX=((1<bSig_r; -always @(*) begin - toInf=0; - xExp = {1'b0,aExp_r} - {1'b0,bExp_r} + { {EXP_W{1'b0}}, rightShiftA }; - if(isOne) begin - xExp = xExp + ((1 << (EXP_W-1)) - 1); - end - else begin - xExp = xExp + ((1 << (EXP_W-1)) - 1) - 1; - end - if(xExp<`EXP_DENORMAL_MIN(EXP_W-1,SIG_W)-3) begin - xExp=`EXP_DENORMAL_MIN(EXP_W-1,SIG_W)-3; - end - else if(xExp>`EXP_NORMAL_MAX(EXP_W-1)) begin - xExp=`EXP_NORMAL_MAX(EXP_W-1)+1; - if(aSig_r>=bSig_r) toInf=1; - end -end - -always @(*) begin - logic [EXP_W-1:0] tmpExp; - logic awayFromInf; - status_fast = 0; - status_fast[`Z_INVALID] = isInvalid; - status_fast[`Z_IS_INF] = isINF; - status_fast[`Z_IS_ZERO] = isZero; - status_fast[`Z_DIV_BY_0] = isDivByZero; - x_fast = {sign, {EXP_W{1'b1}}, 1'b1, {(SIG_W-1){1'b0}}}; //INF - - if(isZero) x_fast = {sign, {EXP_W{1'b0}}, {SIG_W{1'b0}}}; - else if(isINF) x_fast = {sign, {EXP_W{1'b1}}, {SIG_W{1'b0}}}; - else if(isNaN&&useA) x_fast = {sign, aExp_r, aSig_r}; - else if(isNaN&&useB) x_fast = {sign, bExp_r, bSig_r}; - else if(isOne) begin - x_fast={sign, xExp[EXP_W-1:0], {SIG_W{1'b0}}}; - if(xExp<`EXP_DENORMAL_MIN(EXP_W-1,SIG_W)) begin - if( (rnd_r==`RND_UP&&sign==1'b0)|| - (rnd_r==`RND_DOWN&&sign==1'b1)|| - (rnd_r==`RND_FROM_ZERO) || - (rnd_r==`RND_NEAREST_UP&&xExp==`EXP_DENORMAL_MIN(EXP_W-1,SIG_W)-1)) begin - tmpExp=`EXP_DENORMAL_MIN(EXP_W-1,SIG_W); - x_fast={sign, tmpExp, {SIG_W{1'b0}}}; - status_fast[`Z_IS_ZERO] = 0; -`ifdef FORCE_DW_DIV_BEHAVIOR - status_fast[`Z_TINY] = 0; -`else - status_fast[`Z_TINY] = 1; -`endif - status_fast[`Z_INEXACT] = 1; - end - else begin - x_fast={sign, {EXP_W{1'b0}}, {SIG_W{1'b0}}}; - status_fast[`Z_IS_ZERO] = 1; - status_fast[`Z_TINY] = 1; - status_fast[`Z_INEXACT] = 1; - end - end - end - - if(toInf&&(!isINF)&&(!isNaN)) begin - x_fast={sign, {EXP_W{1'b1}}, {SIG_W{1'b0}}}; - status_fast[`Z_IS_INF] = 1; - status_fast[`Z_HUGE] = 1; - status_fast[`Z_INEXACT] = 1; - end - - awayFromInf=(rnd_r==`RND_TO_ZERO || - (rnd_r==`RND_UP && sign==1'b1) || - (rnd_r==`RND_DOWN && sign==1'b0) ); - if(awayFromInf&&(!isINF)&&(!isNaN)&&xExp==`EXP_NORMAL_MAX(EXP_W-1)+1) begin - // larger than the largest possible value by 1 - tmpExp=`EXP_NORMAL_MAX(EXP_W-1); - x_fast={sign, tmpExp, {SIG_W{1'b1}}}; - status_fast[`Z_IS_INF] = 0; - status_fast[`Z_HUGE] = 1; - status_fast[`Z_INEXACT] = 1; - end -end - -//always @(posedge clk) begin -// $display("%d X aExp_r:%b bExp_r:%b rightShiftA:%b xExp:%b(max:%b min:%b) status_fast:%b isOne:%b use_fast:%b toInf:%b isINF:%b newNaN:%b",$time,aExp_r,bExp_r,rightShiftA,xExp,`EXP_NORMAL_MAX(EXP_W),`EXP_DENORMAL_MIN(EXP_W-1,SIG_W),status_fast, isOne, use_fast, toInf, isINF, newNaN); -//end - -generate - // idiv_D's length must be even - if (SIG_W%2==1) begin - assign idiv_N = rightShiftA? {3'b001, aSig_r} : {2'b01, aSig_r, 1'b0}; - assign idiv_D = {2'b01, bSig_r, 1'b0}; - end - else begin - assign idiv_N = rightShiftA? {3'b001, aSig_r, 1'b0} : {2'b01, aSig_r, 2'b0}; - assign idiv_D = {2'b01, bSig_r, 2'b0}; - end -endgenerate - -always @(posedge clk) begin - //if(idiv_strobe) $display("%d idiv_D:%b aSig_r:%b expNoBias:%b aExp_r:%b",$time, - // idiv_D, aSig_r, expNoBias, aExp_r); - //$display("%d use_fast:%b a:%b-%b strobe_r:%b x_fast:%b status_fast:%b", - // $time, use_fast, aExp_r, aSig_r, strobe_r, x_fast, status_fast); - if(strobe_r) begin - if(use_fast) begin - x_reg<=x_fast; - status_reg<=status_fast; - end - else begin - xExp_r<=xExp[EXP_W-1:0]; - end - end -end -assign xExp_o=xExp_r; - -always @(posedge clk) begin - if(reset) use_fast_r<=1'b0; - else if(strobe_r) use_fast_r<=use_fast; - else if(use_fast_r) use_fast_r<=1'b0; -end - - -wire [ExtWidth-1:0] Quo={ExtWidth{idiv_done}}&idiv_Quo; - -wire stickyBit; -wire roundBit; -wire [SIG_W+2-1:0] xSig; -generate - if (SIG_W%2==1) begin - assign stickyBit = idiv_Rem!=0; - assign xSig = Quo[ExtWidth - 1:1]; - assign roundBit = Quo[0]; - end - else begin - assign stickyBit = idiv_Rem!=0 || Quo[0]!=0; - assign xSig = Quo[ExtWidth - 1:2]; - assign roundBit = Quo[1]; - end -endgenerate - -assign xSig_o={xSig,roundBit}; -assign xMidStatus_o={1'b0, sign, stickyBit, 3'b0}; - -assign xStatus_fast_o = status_reg; -assign x_fast_o = x_reg; -assign x_use_fast = use_fast_r; - -assign done_o = use_fast_r||idiv_done; -assign ready_o = idiv_ready&&~strobe_r; - -endmodule + +`include "R5FP_inc.vh" + +module R5FP_div_special_cases #( + parameter EXP_W=5, + parameter SIG_W=10) ( + input [EXP_W+SIG_W:0] a, b, + output reg sign, isNaN, newNaN, isINF, isInvalid, isZero, isDivByZero, isOne, useA, useB); + +localparam E_MAX=((1<bSig_r; +always @(*) begin + toInf=0; + xExp = {1'b0,aExp_r} - {1'b0,bExp_r} + { {EXP_W{1'b0}}, rightShiftA }; + if(isOne) begin + xExp = xExp + ((1 << (EXP_W-1)) - 1); + end + else begin + xExp = xExp + ((1 << (EXP_W-1)) - 1) - 1; + end + if(xExp<`EXP_DENORMAL_MIN(EXP_W-1,SIG_W)-3) begin + xExp=`EXP_DENORMAL_MIN(EXP_W-1,SIG_W)-3; + end + else if(xExp>`EXP_NORMAL_MAX(EXP_W-1)) begin + xExp=`EXP_NORMAL_MAX(EXP_W-1)+1; + if(aSig_r>=bSig_r) toInf=1; + end +end + +always @(*) begin + logic [EXP_W-1:0] tmpExp; + logic awayFromInf; + status_fast = 0; + status_fast[`Z_INVALID] = isInvalid; + status_fast[`Z_IS_INF] = isINF; + status_fast[`Z_IS_ZERO] = isZero; + status_fast[`Z_DIV_BY_0] = isDivByZero; + x_fast = {sign, {EXP_W{1'b1}}, 1'b1, {(SIG_W-1){1'b0}}}; //INF + + if(isZero) x_fast = {sign, {EXP_W{1'b0}}, {SIG_W{1'b0}}}; + else if(isINF) x_fast = {sign, {EXP_W{1'b1}}, {SIG_W{1'b0}}}; + else if(isNaN&&useA) x_fast = {sign, aExp_r, aSig_r}; + else if(isNaN&&useB) x_fast = {sign, bExp_r, bSig_r}; + else if(isOne) begin + x_fast={sign, xExp[EXP_W-1:0], {SIG_W{1'b0}}}; + if(xExp<`EXP_DENORMAL_MIN(EXP_W-1,SIG_W)) begin + if( (rnd_r==`RND_UP&&sign==1'b0)|| + (rnd_r==`RND_DOWN&&sign==1'b1)|| + (rnd_r==`RND_FROM_ZERO) || + (rnd_r==`RND_NEAREST_UP&&xExp==`EXP_DENORMAL_MIN(EXP_W-1,SIG_W)-1)) begin + tmpExp=`EXP_DENORMAL_MIN(EXP_W-1,SIG_W); + x_fast={sign, tmpExp, {SIG_W{1'b0}}}; + status_fast[`Z_IS_ZERO] = 0; +`ifdef FORCE_DW_DIV_BEHAVIOR + status_fast[`Z_TINY] = 0; +`else + status_fast[`Z_TINY] = 1; +`endif + status_fast[`Z_INEXACT] = 1; + end + else begin + x_fast={sign, {EXP_W{1'b0}}, {SIG_W{1'b0}}}; + status_fast[`Z_IS_ZERO] = 1; + status_fast[`Z_TINY] = 1; + status_fast[`Z_INEXACT] = 1; + end + end + end + + if(toInf&&(!isINF)&&(!isNaN)) begin + x_fast={sign, {EXP_W{1'b1}}, {SIG_W{1'b0}}}; + status_fast[`Z_IS_INF] = 1; + status_fast[`Z_HUGE] = 1; + status_fast[`Z_INEXACT] = 1; + end + + awayFromInf=(rnd_r==`RND_TO_ZERO || + (rnd_r==`RND_UP && sign==1'b1) || + (rnd_r==`RND_DOWN && sign==1'b0) ); + if(awayFromInf&&(!isINF)&&(!isNaN)&&xExp==`EXP_NORMAL_MAX(EXP_W-1)+1) begin + // larger than the largest possible value by 1 + tmpExp=`EXP_NORMAL_MAX(EXP_W-1); + x_fast={sign, tmpExp, {SIG_W{1'b1}}}; + status_fast[`Z_IS_INF] = 0; + status_fast[`Z_HUGE] = 1; + status_fast[`Z_INEXACT] = 1; + end +end + +//always @(posedge clk) begin +// $display("%d X aExp_r:%b bExp_r:%b rightShiftA:%b xExp:%b(max:%b min:%b) status_fast:%b isOne:%b use_fast:%b toInf:%b isINF:%b newNaN:%b",$time,aExp_r,bExp_r,rightShiftA,xExp,`EXP_NORMAL_MAX(EXP_W),`EXP_DENORMAL_MIN(EXP_W-1,SIG_W),status_fast, isOne, use_fast, toInf, isINF, newNaN); +//end + +generate + // idiv_D's length must be even + if (SIG_W%2==1) begin + assign idiv_N = rightShiftA? {3'b001, aSig_r} : {2'b01, aSig_r, 1'b0}; + assign idiv_D = {2'b01, bSig_r, 1'b0}; + end + else begin + assign idiv_N = rightShiftA? {3'b001, aSig_r, 1'b0} : {2'b01, aSig_r, 2'b0}; + assign idiv_D = {2'b01, bSig_r, 2'b0}; + end +endgenerate + +always @(posedge clk) begin + //if(idiv_strobe) $display("%d idiv_D:%b aSig_r:%b expNoBias:%b aExp_r:%b",$time, + // idiv_D, aSig_r, expNoBias, aExp_r); + //$display("%d use_fast:%b a:%b-%b strobe_r:%b x_fast:%b status_fast:%b", + // $time, use_fast, aExp_r, aSig_r, strobe_r, x_fast, status_fast); + reg [EXP_W-1:0] exp; + exp=xExp[EXP_W-1:0]; + if(strobe_r) begin + if(use_fast) begin + x_reg<=x_fast; + status_reg<=status_fast; + end + else begin + xExp_r<=exp; + if(exp>=`EXP_DENORMAL_MIN(EXP_W-1,SIG_W) && exp<=`EXP_DENORMAL_MAX(EXP_W-1)) begin + tailZeroCnt_r<=1+(`EXP_DENORMAL_MAX(EXP_W-1)-exp); + end + else begin + tailZeroCnt_r<=0; + end + end + end +end +assign xExp_o=xExp_r; +assign tailZeroCnt_o=tailZeroCnt_r; + +always @(posedge clk) begin + if(reset) use_fast_r<=1'b0; + else if(strobe_r) use_fast_r<=use_fast; + else if(use_fast_r) use_fast_r<=1'b0; +end + + +wire [ExtWidth-1:0] Quo={ExtWidth{idiv_done}}&idiv_Quo; + +wire stickyBit; +wire roundBit; +wire [SIG_W+2-1:0] xSig; +generate + if (SIG_W%2==1) begin + assign stickyBit = idiv_Rem!=0; + assign xSig = Quo[ExtWidth - 1:1]; + assign roundBit = Quo[0]; + end + else begin + assign stickyBit = idiv_Rem!=0 || Quo[0]!=0; + assign xSig = Quo[ExtWidth - 1:2]; + assign roundBit = Quo[1]; + end +endgenerate + +assign xSig_o={xSig,roundBit}; +assign xMidStatus_o={1'b0, sign, stickyBit, 3'b0}; + +assign xStatus_fast_o = status_reg; +assign x_fast_o = x_reg; +assign x_use_fast = use_fast_r; + +assign done_o = use_fast_r||idiv_done; +assign ready_o = idiv_ready&&~strobe_r; + +endmodule diff --git a/rtl/R5FP_int_div_sqrt.v b/rtl/R5FP_int_div_sqrt.v index 2289fa2..495a97b 100644 --- a/rtl/R5FP_int_div_sqrt.v +++ b/rtl/R5FP_int_div_sqrt.v @@ -1,135 +1,135 @@ - -module R5FP_int_div_sqrt #(parameter W=6) ( - input [W-1:0] N_i,D_i, - input strobe_i, is_div_i, - output [W-1:0] Quo_o, Rem_o, - output reg done_o, - output ready_o, - input clk,reset); - -localparam CW=$clog2(W)-1; - -reg is_div_r; -always @(posedge clk) if(strobe_i) is_div_r<=is_div_i; - -reg [W-1:0] D_r; -reg [W+1:0] P_r; -reg [CW-1:0] counter; -reg [W-1:0] Q_r; -reg idle_r; -assign ready_o=idle_r; -assign Quo_o={W{done_o}}&Q_r; -assign Rem_o=is_div_r? P_r[W-1:0] : - { {(W-1){1'b0}}, Quo_o!=~({W{done_o}}&P_r[W:1])}; - -reg [W+1:0] nextP_a, nextP_b, nextP_c; -reg [W+1:0] nnP1_a, nnP1_b, nnP1_c, nnP1_d; -reg [W+1:0] nnP0_a, nnP0_b, nnP0_c, nnP0_d; -wire [W+1:0] nextP=nextP_a+nextP_b+nextP_c; -wire [W+1:0] nnP1=nnP1_a+nnP1_b+nnP1_c+nnP1_d; -wire [W+1:0] nnP0=nnP0_a+nnP0_b+nnP0_c+nnP0_d; -/* verilator lint_off WIDTH */ -always @(*) begin - if(is_div_r) begin - //nextP={P_r,1'b0} - D_r; - //nnP1={P_r,2'b0} - {D_r,1'b0} - D_r; - //nnP0={P_r,2'b0} - D_r; - nextP_a={P_r,1'b0}; nextP_b=~D_r; nextP_c=1; - nnP1_a={P_r,2'b0}; nnP1_b=~{D_r,1'b0}; - nnP1_c=~D_r; nnP1_d=2; - nnP0_a={P_r,2'b0}; nnP0_b=~D_r; - nnP0_c=0; nnP0_d=1; - end - else begin - nextP_a={P_r,2'b0}; nextP_c=D_r[W-1:W-2]; nextP_b={P_r[W+1] ? Q_r : ~Q_r, 2'b11}; - nnP1_a={P_r,4'b0}; nnP1_b={P_r[W+1] ? Q_r : ~Q_r, 4'b1100}; - nnP1_c={Q_r[W-2:0],3'b011}; nnP1_d=D_r[W-1:W-4]; - nnP0_a={P_r,4'b0}; nnP0_b={P_r[W+1] ? Q_r : ~Q_r, 4'b1100}; - nnP0_c={~Q_r[W-2:0],3'b011}; nnP0_d=D_r[W-1:W-4]; - end -end -/* verilator lint_on WIDTH */ - -always @(posedge clk) begin - assert(W%2==0); - if(reset) begin - counter<=0; - end - else if(strobe_i) begin - if(is_div_i) begin - P_r[W-1:0]<=N_i; - D_r<=D_i; - Q_r<={W{1'b0}}; - counter<=0; - //$display("Input N:%b D:%b", N_i, D_i); - end - else begin - P_r<=0; - D_r<=D_i; - if(!reset) assert(D_i[W-1]==1'b0); - Q_r<={W{1'b0}}; - counter<=0; - //$display("INT Input D:%b", D_i); - end - end - else if(!idle_r) begin - if(is_div_r) begin - reg [W-1:0] nnP,nnQ; - reg [W-2:0] nextQ; - nextQ={Q_r[W-3:0], nextP[W-1]==1'b0}; - if(nextP[W-1]==1'b0) begin - nnP=(nnP1[W-1]==1'b0)? nnP1[W-1:0] : {nextP[W-2:0],1'b0}; - nnQ={nextQ[W-2:0], nnP1[W-1]==1'b0}; - end - else begin - nnP=(nnP0[W-1]==1'b0)? nnP0[W-1:0] : {P_r[W-3:0],2'b0}; - nnQ={nextQ[W-2:0], nnP0[W-1]==1'b0}; - end - Q_r<=nnQ; - P_r[W-1:0]<=nnP; - counter<=counter+1; - //$display("Now P:%b->%b Q:%b->", P_r,nnP,Q_r,nnQ); - end - else begin - reg [W+1:0] nnP; - reg [W-1:0] nnQ; - - if(nextP[W+1]) begin - nnP=nnP1; - nnQ={Q_r[W-3:0],1'b0,~nnP1[W+1]}; - end - else begin - nnP=nnP0; - nnQ={Q_r[W-3:0],1'b1,~nnP0[W+1]}; - end - P_r<=nnP; - Q_r<=nnQ; - counter<=counter+1; - D_r<=D_r<<4; - //$display("INT Now D_r:%b(%d) P:%b(%d)->%b(%d) q:%b(%d)->%b(%d) %b %b", - // D_r,D_r,P_r,P_r,nnP,nnP,Q_r,Q_r,nnQ,nnQ, nnQ,~nnP[W:1]); - end - end -end - -always @(posedge clk) begin - if(reset) begin - done_o<=1'b0; - idle_r<=1'b1; - end - else if(strobe_i) begin - done_o<=1'b0; - idle_r<=1'b0; - end - else if(done_o) begin - done_o<=1'b0; - end - /* verilator lint_off WIDTH */ - else if(counter==W/2-1) begin - done_o<=1'b1; - idle_r<=1'b1; - end - /* verilator lint_on WIDTH */ -end - -endmodule + +module R5FP_int_div_sqrt #(parameter W=6) ( + input [W-1:0] N_i,D_i, + input strobe_i, is_div_i, + output [W-1:0] Quo_o, Rem_o, + output reg done_o, + output ready_o, + input clk,reset); + +localparam CW=$clog2(W)-1; + +reg is_div_r; +always @(posedge clk) if(strobe_i) is_div_r<=is_div_i; + +reg [W-1:0] D_r; +reg [W+1:0] P_r; +reg [CW-1:0] counter; +reg [W-1:0] Q_r; +reg idle_r; +assign ready_o=idle_r; +assign Quo_o={W{done_o}}&Q_r; +assign Rem_o=is_div_r? P_r[W-1:0] : + { {(W-1){1'b0}}, Quo_o!=~({W{done_o}}&P_r[W:1])}; + +reg [W+1:0] nextP_a, nextP_b, nextP_c; +reg [W+1:0] nnP1_a, nnP1_b, nnP1_c, nnP1_d; +reg [W+1:0] nnP0_a, nnP0_b, nnP0_c, nnP0_d; +wire [W+1:0] nextP=nextP_a+nextP_b+nextP_c; +wire [W+1:0] nnP1=nnP1_a+nnP1_b+nnP1_c+nnP1_d; +wire [W+1:0] nnP0=nnP0_a+nnP0_b+nnP0_c+nnP0_d; +/* verilator lint_off WIDTH */ +always @(*) begin + if(is_div_r) begin + //nextP={P_r,1'b0} - D_r; + //nnP1={P_r,2'b0} - {D_r,1'b0} - D_r; + //nnP0={P_r,2'b0} - D_r; + nextP_a={P_r,1'b0}; nextP_b=~D_r; nextP_c=1; + nnP1_a={P_r,2'b0}; nnP1_b=~{D_r,1'b0}; + nnP1_c=~D_r; nnP1_d=2; + nnP0_a={P_r,2'b0}; nnP0_b=~D_r; + nnP0_c=0; nnP0_d=1; + end + else begin + nextP_a={P_r,2'b0}; nextP_c=D_r[W-1:W-2]; nextP_b={P_r[W+1] ? Q_r : ~Q_r, 2'b11}; + nnP1_a={P_r,4'b0}; nnP1_b={P_r[W+1] ? Q_r : ~Q_r, 4'b1100}; + nnP1_c={Q_r[W-2:0],3'b011}; nnP1_d=D_r[W-1:W-4]; + nnP0_a={P_r,4'b0}; nnP0_b={P_r[W+1] ? Q_r : ~Q_r, 4'b1100}; + nnP0_c={~Q_r[W-2:0],3'b011}; nnP0_d=D_r[W-1:W-4]; + end +end +/* verilator lint_on WIDTH */ + +always @(posedge clk) begin + assert(W%2==0); + if(reset) begin + counter<=0; + end + else if(strobe_i) begin + if(is_div_i) begin + P_r[W-1:0]<=N_i; + D_r<=D_i; + Q_r<={W{1'b0}}; + counter<=0; + //$display("Input N:%b D:%b", N_i, D_i); + end + else begin + P_r<=0; + D_r<=D_i; + if(!reset) assert(D_i[W-1]==1'b0); + Q_r<={W{1'b0}}; + counter<=0; + //$display("INT Input D:%b", D_i); + end + end + else if(!idle_r) begin + if(is_div_r) begin + reg [W-1:0] nnP,nnQ; + reg [W-2:0] nextQ; + nextQ={Q_r[W-3:0], nextP[W-1]==1'b0}; + if(nextP[W-1]==1'b0) begin + nnP=(nnP1[W-1]==1'b0)? nnP1[W-1:0] : {nextP[W-2:0],1'b0}; + nnQ={nextQ[W-2:0], nnP1[W-1]==1'b0}; + end + else begin + nnP=(nnP0[W-1]==1'b0)? nnP0[W-1:0] : {P_r[W-3:0],2'b0}; + nnQ={nextQ[W-2:0], nnP0[W-1]==1'b0}; + end + Q_r<=nnQ; + P_r[W-1:0]<=nnP; + counter<=counter+1; + //$display("Now P:%b->%b Q:%b->", P_r,nnP,Q_r,nnQ); + end + else begin + reg [W+1:0] nnP; + reg [W-1:0] nnQ; + + if(nextP[W+1]) begin + nnP=nnP1; + nnQ={Q_r[W-3:0],1'b0,~nnP1[W+1]}; + end + else begin + nnP=nnP0; + nnQ={Q_r[W-3:0],1'b1,~nnP0[W+1]}; + end + P_r<=nnP; + Q_r<=nnQ; + counter<=counter+1; + D_r<=D_r<<4; + //$display("INT Now D_r:%b(%d) P:%b(%d)->%b(%d) q:%b(%d)->%b(%d) %b %b", + // D_r,D_r,P_r,P_r,nnP,nnP,Q_r,Q_r,nnQ,nnQ, nnQ,~nnP[W:1]); + end + end +end + +always @(posedge clk) begin + if(reset) begin + done_o<=1'b0; + idle_r<=1'b1; + end + else if(strobe_i) begin + done_o<=1'b0; + idle_r<=1'b0; + end + else if(done_o) begin + done_o<=1'b0; + end + /* verilator lint_off WIDTH */ + else if(counter==W/2-1) begin + done_o<=1'b1; + idle_r<=1'b1; + end + /* verilator lint_on WIDTH */ +end + +endmodule diff --git a/rtl/R5FP_postproc.v b/rtl/R5FP_postproc.v index 6061806..48f4e82 100644 --- a/rtl/R5FP_postproc.v +++ b/rtl/R5FP_postproc.v @@ -1,28 +1,54 @@ +`include "R5FP_inc.vh" + `define DEBUG $display `define FUNC_POSTPROC func_postproc + +module R5FP_postproc_prepare #( + parameter I_SIG_W=27, + parameter SIG_W=23, + parameter EXP_W=9) ( + input [EXP_W-1:0] tailZeroCnt, + output [I_SIG_W:0] mask, maskB); + +reg [I_SIG_W+SIG_W:0] tmpMask,tmpMaskB; +always @(*) begin + tmpMask=0; + tmpMask[I_SIG_W-3:0]={(I_SIG_W-2){1'b1}}; + tmpMask<<=tailZeroCnt; + tmpMaskB=0; + tmpMaskB[I_SIG_W-2]=1'b1; + tmpMaskB<<=tailZeroCnt; +/* verilator lint_off WIDTH */ + mask=tmpMask>>SIG_W; + maskB=tmpMaskB>>SIG_W; +/* verilator lint_on WIDTH */ +end + +endmodule + module R5FP_postproc_core #( - parameter I_SIG_W=25, + parameter I_SIG_W=27, parameter SIG_W=23, parameter EXP_W=9) ( input [EXP_W-1:0] aExp, - input [6-1:0] aStatus, /* verilator lint_off UNUSED */ + input [6-1:0] aStatus, input [I_SIG_W-1:0] aSig, + input [I_SIG_W:0] mask,maskB, /* verilator lint_on UNUSED */ input [2:0] rnd, - input aSign, + input aSign, specialTiny, zToInf, input [EXP_W-1:0] tailZeroCnt, - output specialZRnd, output reg [SIG_W+EXP_W:0] z, output reg [7:0] zStatus); reg sticky, round_bit, guard_bit; reg useMinValue; -wire specialRnd=(aSig[I_SIG_W-SIG_W-2:I_SIG_W-SIG_W-3]==2'b10); -reg specialZ; -assign specialZRnd=specialZ&&specialRnd; +/* verilator lint_off UNUSED */ +wire specialRnd=specialTiny&&(aSig[I_SIG_W-SIG_W-2:I_SIG_W-SIG_W-3]==2'b10); +/* verilator lint_on UNUSED */ always @(*) begin reg [EXP_W+2:0] aExpExt; reg signed [I_SIG_W:0] aSig2; @@ -30,12 +56,11 @@ always @(*) begin reg [SIG_W-1:0] oneSig; reg [EXP_W-1:0] zeroExp,minExp; reg [EXP_W-1:0] allOnesExp; - reg [I_SIG_W+SIG_W:0] aSig3,rnd_bits,aSig3Tail,aSig4; - reg [SIG_W-1:0] mask; + reg [I_SIG_W+SIG_W:0] aSig3,rnd_bits,aSig4; + reg [SIG_W-1:0] tmp; reg [SIG_W-1:0] zSig; reg roundCarry,needShift; // variable initialization - specialZ=0; useMinValue=0; zeroSig = 0; oneSig = 0; @@ -47,7 +72,6 @@ always @(*) begin aSig3 = 0; z = 0; rnd_bits = 0; - mask = 0; zStatus = 0; zStatus[`Z_IS_ZERO] = aStatus[`IS_ZERO]; @@ -109,22 +133,24 @@ always @(*) begin aSig3 = { {SIG_W{1'b0}}, aSig2 } << SIG_W; - aSig3Tail=aSig3>>tailZeroCnt; - sticky = (|aSig3Tail[I_SIG_W-1-1-1:0]) || sticky; - round_bit = aSig3Tail[I_SIG_W-1-1]; - guard_bit = aSig3Tail[I_SIG_W-1]; + //aSig3Tail=aSig3>>tailZeroCnt; + //sticky = (|aSig3Tail[I_SIG_W-1-1-1:0]) || sticky; + //round_bit = aSig3Tail[I_SIG_W-1-1]; + //guard_bit = aSig3Tail[I_SIG_W-1]; + sticky = (|(aSig2&mask)) || sticky; + round_bit = |(aSig2&maskB); + guard_bit = |(aSig2&(maskB<<1)); //`DEBUG("Here5 aExp:%b aSig:%b aSig2:%b aSig3:%b tailZeroCnt:%d aSig3Tail:%b sticky:%b aSig3Tail[I_SIG_W-1-1-1:0]:%b guard_bit:%b round_bit:%b aStatus:%b zStatus:%b", aExp, aSig,aSig2,aSig3,tailZeroCnt,aSig3Tail, sticky, aSig3Tail[I_SIG_W-1-1-1:0],guard_bit,round_bit,aStatus,zStatus); roundCarry = getRoundCarry(rnd, aSign, guard_bit, round_bit, sticky); - rnd_bits[0] = roundCarry; + rnd_bits[0] = 1'b1; rnd_bits = rnd_bits<<(I_SIG_W-1); rnd_bits = rnd_bits<=`EXP_DENORMAL_MIN(EXP_W-1,SIG_W) && aExp<=`EXP_DENORMAL_MAX(EXP_W-1)) begin - tailZeroCnt=1+(`EXP_DENORMAL_MAX(EXP_W-1)-aExp); - end -end +//reg [EXP_W-1:0] tailZeroCnt0; +//always @(*) begin +// tailZeroCnt0=0; +// if(aExp>=`EXP_DENORMAL_MIN(EXP_W-1,SIG_W) && aExp<=`EXP_DENORMAL_MAX(EXP_W-1)) begin +// tailZeroCnt0=1+(`EXP_DENORMAL_MAX(EXP_W-1)-aExp); +// end +//end + +wire [I_SIG_W:0] mask, maskB; +R5FP_postproc_prepare #( + .I_SIG_W(I_SIG_W), + .SIG_W(SIG_W), + .EXP_W(EXP_W)) prepare ( + .tailZeroCnt(tailZeroCnt), + .mask(mask), + .maskB(maskB)); R5FP_postproc_core #( .I_SIG_W(I_SIG_W), @@ -264,10 +305,13 @@ R5FP_postproc_core #( .aExp(aExp), .aStatus(aStatus), .aSig(aSig), + .mask(mask), + .maskB(maskB), .rnd(rnd), .aSign(aSign), + .specialTiny(specialTiny), + .zToInf(zToInf), .tailZeroCnt(tailZeroCnt), - .specialZRnd(specialZRnd), .z(z), .zStatus(zStatus)); diff --git a/rtl/R5FP_sqrt.v b/rtl/R5FP_sqrt.v index fe90fd3..40ec200 100644 --- a/rtl/R5FP_sqrt.v +++ b/rtl/R5FP_sqrt.v @@ -1,213 +1,213 @@ - -`include "R5FP_inc.vh" - -module R5FP_sqrt #( - parameter SIG_W = 23, - parameter EXP_W = 8, - localparam ExtWidth=(SIG_W%2==1)? (SIG_W + 3) : (SIG_W + 4) ) ( - input [SIG_W + EXP_W:0] a_i, - input [2:0] rnd_i, - input strobe_i, - output reg [SIG_W + EXP_W:0] z_o, - output reg [7:0] status_o, - - output [ExtWidth-1:0] isqrt_D, - output isqrt_strobe, - input [ExtWidth-1:0] isqrt_Quo, -/* verilator lint_off UNUSED */ - input [ExtWidth-1:0] isqrt_Rem, -/* verilator lint_on UNUSED */ - input isqrt_done, - input isqrt_ready, - - output done_o, ready_o, - input clk,reset); - -reg [2:0] rnd_r; -reg [EXP_W - 1:0] aExp_r; -reg [SIG_W - 1:0] aSig_r; -reg aSign_r; -reg signed [EXP_W+2:0] zExp; -reg signed [EXP_W-1:0] zExp_r; -reg signed [EXP_W-1:0] zExp_tmp; -reg [8 - 1:0] status_fast; -reg [8 - 1:0] status_normal; -reg [8 - 1:0] status_reg; -reg [(EXP_W + SIG_W):0] z_fast; -reg [(EXP_W + SIG_W):0] z_reg; -reg [(EXP_W + SIG_W):0] z_normal; -reg use_fast, use_fast_r; -reg strobe_r; -always @(posedge clk) begin - if(reset) strobe_r<=1'b0; - else if(strobe_i) strobe_r<=1'b1; - else if(strobe_r) strobe_r<=1'b0; -end -assign isqrt_strobe=strobe_r&~use_fast; - -always @(posedge clk) begin - if(reset) begin - rnd_r<=0; - end - else if(strobe_i) begin - //$display("Get new input:%b-%b",a_i[((EXP_W + SIG_W) - 1):SIG_W],a_i[(SIG_W - 1):0]); - aExp_r<=a_i[((EXP_W + SIG_W) - 1):SIG_W]; - aSig_r<=a_i[(SIG_W - 1):0]; - aSign_r<=a_i[(EXP_W + SIG_W)]; - rnd_r<=rnd_i; - end -end - - -always @(*) begin - reg [(EXP_W + SIG_W):0] NaN_Reg; - reg aExpIsAllOnes; - reg aIsZero; -/* verilator lint_off UNUSED */ - reg [(EXP_W + SIG_W):0] INF_Reg; - reg aIsNaN; -/* verilator lint_on UNUSED */ - reg negInput; - - status_fast = 0; - z_fast=0; - use_fast=1; - aExpIsAllOnes = (aExp_r == ((((1 << (EXP_W-1)) - 1) * 2) + 1)); - - aIsZero = (aExp_r == 0) && (aSig_r == 0); - aIsNaN = ((&aExp_r) == 1) && (aSig_r != 0); - NaN_Reg = {aSign_r, {(EXP_W){1'b1}}, 1'b1, {(SIG_W-1){1'b0}}}; - INF_Reg = {aSign_r, {(EXP_W){1'b1}}, 1'b0, {(SIG_W-1){1'b0}}}; - - negInput = aSign_r & ~aIsZero; - if (aExpIsAllOnes || negInput) begin - //square root of Infinity, NaN and negative number -`ifdef FORCE_DW_SQRT_BEHAVIOR - status_fast[`Z_INVALID] = aExpIsAllOnes || negInput; - z_fast = NaN_Reg; -`else - status_fast[`Z_INVALID] = negInput && !aIsNaN; - z_fast = (aIsNaN||negInput)? NaN_Reg : INF_Reg; -`endif - end - else if (aIsZero) begin - status_fast[`Z_IS_ZERO] = 1; - z_fast = {a_i[(EXP_W + SIG_W)], {(SIG_W + EXP_W){1'b0}}}; - end - else begin - use_fast=0; - end -end - -reg signed [EXP_W+1:0] expNoBias; -always @(*) begin -/* verilator lint_off WIDTH */ - expNoBias = aExp_r - ((1 << (EXP_W-1)) - 1); - zExp = $signed(expNoBias[EXP_W + 1:1]+expNoBias[0]); - zExp = zExp + ((1 << (EXP_W-1)) - 1); -/* verilator lint_on WIDTH */ - assert(zExp>0); -end - -generate - // isqrt_D's length must be even - if (SIG_W%2==1) begin - assign isqrt_D = expNoBias[0]? {3'b001, aSig_r} : {2'b01, aSig_r, 1'b0}; - end - else begin - assign isqrt_D = expNoBias[0]? {3'b001, aSig_r, 1'b0} : {2'b01, aSig_r, 2'b0}; - end -endgenerate - -always @(posedge clk) begin - //if(isqrt_strobe) $display("%d isqrt_D:%b aSig_r:%b expNoBias:%b aExp_r:%b",$time, - // isqrt_D, aSig_r, expNoBias, aExp_r); - //$display("%d use_fast:%b a:%b-%b strobe_r:%b z_fast:%b status_fast:%b", - // $time, use_fast, aExp_r, aSig_r, strobe_r, z_fast, status_fast); - if(strobe_r) begin - if(use_fast) begin - z_reg<=z_fast; - status_reg<=status_fast; - end - else begin - zExp_r<=zExp[EXP_W-1:0]; - end - end -end -always @(posedge clk) begin - if(reset) use_fast_r<=1'b0; - else if(strobe_r) use_fast_r<=use_fast; - else if(use_fast_r) use_fast_r<=1'b0; -end - - -logic [ExtWidth-1:0] Quo; -logic expNeedInc,extraBit; -always @(*) begin - Quo={ExtWidth{isqrt_done}}&isqrt_Quo ; - expNeedInc=Quo[ExtWidth-1]; - extraBit=1'b0; - if(expNeedInc) begin - extraBit=Quo[0]; - Quo=(Quo>>1); - end -end - -wire stickyBit; -wire roundBit; -wire guardBit; -wire [SIG_W+2-1:0] zSig; -generate - if (SIG_W%2==1) begin - assign stickyBit = isqrt_Rem[0]||extraBit; - assign zSig = Quo[ExtWidth - 1:1]; - assign roundBit = Quo[0]; - assign guardBit = Quo[1]; - end - else begin - assign stickyBit = isqrt_Rem[0] || extraBit || Quo[0]!=0; - assign zSig = Quo[ExtWidth - 1:2]; - assign roundBit = Quo[1]; - assign guardBit = Quo[2]; - end -endgenerate - -always @(*) begin - reg [SIG_W+2-1:0] zSigX0,zSigX; - reg sigIncr; - - sigIncr = getRoundCarry(rnd_r, 1'b0, guardBit, roundBit, stickyBit); - - // add round bit - if (sigIncr) zSigX0 = zSig + 1; - else zSigX0 = zSig; - - if (zSigX0[SIG_W+2-1:SIG_W+1-1] == 2'b0) begin - zExp_tmp = zExp_r + {{(EXP_W-1){1'b0}},expNeedInc} - 2; - zSigX = zSigX0 << 1; - end - else if (zSigX0[SIG_W+2-1] == 1'b0) begin - zExp_tmp = zExp_r + {{(EXP_W-1){1'b0}},expNeedInc} - 1; - zSigX = zSigX0; - end - else begin - zExp_tmp=zExp_r + {{(EXP_W-1){1'b0}},expNeedInc} + 0; - zSigX=zSigX0 >> 1; - end - - status_normal = 0; - status_normal[`Z_INEXACT] = roundBit|stickyBit; - - if(isqrt_done&&!reset) assert(zSigX[SIG_W+2-1:SIG_W+2-2]==2'b01); - z_normal = {1'b0, zExp_tmp[EXP_W - 1:0], zSigX[SIG_W-1:0]}; - //$display("%d isqrt_Quo:%b Quo:%b Rem:%b g r t: %b %b %b \nzSig:%b zSigX0:%b zSigX:%b zExp_r:%b zExp_tmp:%b expNeedInc:%b",$time, - // isqrt_Quo, Quo, isqrt_Rem, guardBit, roundBit, stickyBit, zSig, zSigX0, zSigX, zExp_r, zExp_tmp, expNeedInc); -end - -assign status_o = use_fast_r ? status_reg : status_normal; -assign z_o = use_fast_r ? z_reg : z_normal; - -assign done_o = use_fast_r||isqrt_done; -assign ready_o = isqrt_ready&&~strobe_r; - -endmodule + +`include "R5FP_inc.vh" + +module R5FP_sqrt #( + parameter SIG_W = 23, + parameter EXP_W = 8, + localparam ExtWidth=(SIG_W%2==1)? (SIG_W + 3) : (SIG_W + 4) ) ( + input [SIG_W + EXP_W:0] a_i, + input [2:0] rnd_i, + input strobe_i, + output reg [SIG_W + EXP_W:0] z_o, + output reg [7:0] status_o, + + output [ExtWidth-1:0] isqrt_D, + output isqrt_strobe, + input [ExtWidth-1:0] isqrt_Quo, +/* verilator lint_off UNUSED */ + input [ExtWidth-1:0] isqrt_Rem, +/* verilator lint_on UNUSED */ + input isqrt_done, + input isqrt_ready, + + output done_o, ready_o, + input clk,reset); + +reg [2:0] rnd_r; +reg [EXP_W - 1:0] aExp_r; +reg [SIG_W - 1:0] aSig_r; +reg aSign_r; +reg signed [EXP_W+2:0] zExp; +reg signed [EXP_W-1:0] zExp_r; +reg signed [EXP_W-1:0] zExp_tmp; +reg [8 - 1:0] status_fast; +reg [8 - 1:0] status_normal; +reg [8 - 1:0] status_reg; +reg [(EXP_W + SIG_W):0] z_fast; +reg [(EXP_W + SIG_W):0] z_reg; +reg [(EXP_W + SIG_W):0] z_normal; +reg use_fast, use_fast_r; +reg strobe_r; +always @(posedge clk) begin + if(reset) strobe_r<=1'b0; + else if(strobe_i) strobe_r<=1'b1; + else if(strobe_r) strobe_r<=1'b0; +end +assign isqrt_strobe=strobe_r&~use_fast; + +always @(posedge clk) begin + if(reset) begin + rnd_r<=0; + end + else if(strobe_i) begin + //$display("Get new input:%b-%b",a_i[((EXP_W + SIG_W) - 1):SIG_W],a_i[(SIG_W - 1):0]); + aExp_r<=a_i[((EXP_W + SIG_W) - 1):SIG_W]; + aSig_r<=a_i[(SIG_W - 1):0]; + aSign_r<=a_i[(EXP_W + SIG_W)]; + rnd_r<=rnd_i; + end +end + + +always @(*) begin + reg [(EXP_W + SIG_W):0] NaN_Reg; + reg aExpIsAllOnes; + reg aIsZero; +/* verilator lint_off UNUSED */ + reg [(EXP_W + SIG_W):0] INF_Reg; + reg aIsNaN; +/* verilator lint_on UNUSED */ + reg negInput; + + status_fast = 0; + z_fast=0; + use_fast=1; + aExpIsAllOnes = (aExp_r == ((((1 << (EXP_W-1)) - 1) * 2) + 1)); + + aIsZero = (aExp_r == 0) && (aSig_r == 0); + aIsNaN = ((&aExp_r) == 1) && (aSig_r != 0); + NaN_Reg = {aSign_r, {(EXP_W){1'b1}}, 1'b1, {(SIG_W-1){1'b0}}}; + INF_Reg = {aSign_r, {(EXP_W){1'b1}}, 1'b0, {(SIG_W-1){1'b0}}}; + + negInput = aSign_r & ~aIsZero; + if (aExpIsAllOnes || negInput) begin + //square root of Infinity, NaN and negative number +`ifdef FORCE_DW_SQRT_BEHAVIOR + status_fast[`Z_INVALID] = aExpIsAllOnes || negInput; + z_fast = NaN_Reg; +`else + status_fast[`Z_INVALID] = negInput && !aIsNaN; + z_fast = (aIsNaN||negInput)? NaN_Reg : INF_Reg; +`endif + end + else if (aIsZero) begin + status_fast[`Z_IS_ZERO] = 1; + z_fast = {a_i[(EXP_W + SIG_W)], {(SIG_W + EXP_W){1'b0}}}; + end + else begin + use_fast=0; + end +end + +reg signed [EXP_W+1:0] expNoBias; +always @(*) begin +/* verilator lint_off WIDTH */ + expNoBias = aExp_r - ((1 << (EXP_W-1)) - 1); + zExp = $signed(expNoBias[EXP_W + 1:1]+expNoBias[0]); + zExp = zExp + ((1 << (EXP_W-1)) - 1); +/* verilator lint_on WIDTH */ + assert(zExp>0); +end + +generate + // isqrt_D's length must be even + if (SIG_W%2==1) begin + assign isqrt_D = expNoBias[0]? {3'b001, aSig_r} : {2'b01, aSig_r, 1'b0}; + end + else begin + assign isqrt_D = expNoBias[0]? {3'b001, aSig_r, 1'b0} : {2'b01, aSig_r, 2'b0}; + end +endgenerate + +always @(posedge clk) begin + //if(isqrt_strobe) $display("%d isqrt_D:%b aSig_r:%b expNoBias:%b aExp_r:%b",$time, + // isqrt_D, aSig_r, expNoBias, aExp_r); + //$display("%d use_fast:%b a:%b-%b strobe_r:%b z_fast:%b status_fast:%b", + // $time, use_fast, aExp_r, aSig_r, strobe_r, z_fast, status_fast); + if(strobe_r) begin + if(use_fast) begin + z_reg<=z_fast; + status_reg<=status_fast; + end + else begin + zExp_r<=zExp[EXP_W-1:0]; + end + end +end +always @(posedge clk) begin + if(reset) use_fast_r<=1'b0; + else if(strobe_r) use_fast_r<=use_fast; + else if(use_fast_r) use_fast_r<=1'b0; +end + + +logic [ExtWidth-1:0] Quo; +logic expNeedInc,extraBit; +always @(*) begin + Quo={ExtWidth{isqrt_done}}&isqrt_Quo ; + expNeedInc=Quo[ExtWidth-1]; + extraBit=1'b0; + if(expNeedInc) begin + extraBit=Quo[0]; + Quo=(Quo>>1); + end +end + +wire stickyBit; +wire roundBit; +wire guardBit; +wire [SIG_W+2-1:0] zSig; +generate + if (SIG_W%2==1) begin + assign stickyBit = isqrt_Rem[0]||extraBit; + assign zSig = Quo[ExtWidth - 1:1]; + assign roundBit = Quo[0]; + assign guardBit = Quo[1]; + end + else begin + assign stickyBit = isqrt_Rem[0] || extraBit || Quo[0]!=0; + assign zSig = Quo[ExtWidth - 1:2]; + assign roundBit = Quo[1]; + assign guardBit = Quo[2]; + end +endgenerate + +always @(*) begin + reg [SIG_W+2-1:0] zSigX0,zSigX; + reg sigIncr; + + sigIncr = getRoundCarry(rnd_r, 1'b0, guardBit, roundBit, stickyBit); + + // add round bit + if (sigIncr) zSigX0 = zSig + 1; + else zSigX0 = zSig; + + if (zSigX0[SIG_W+2-1:SIG_W+1-1] == 2'b0) begin + zExp_tmp = zExp_r + {{(EXP_W-1){1'b0}},expNeedInc} - 2; + zSigX = zSigX0 << 1; + end + else if (zSigX0[SIG_W+2-1] == 1'b0) begin + zExp_tmp = zExp_r + {{(EXP_W-1){1'b0}},expNeedInc} - 1; + zSigX = zSigX0; + end + else begin + zExp_tmp=zExp_r + {{(EXP_W-1){1'b0}},expNeedInc} + 0; + zSigX=zSigX0 >> 1; + end + + status_normal = 0; + status_normal[`Z_INEXACT] = roundBit|stickyBit; + + if(isqrt_done&&!reset) assert(zSigX[SIG_W+2-1:SIG_W+2-2]==2'b01); + z_normal = {1'b0, zExp_tmp[EXP_W - 1:0], zSigX[SIG_W-1:0]}; + //$display("%d isqrt_Quo:%b Quo:%b Rem:%b g r t: %b %b %b \nzSig:%b zSigX0:%b zSigX:%b zExp_r:%b zExp_tmp:%b expNeedInc:%b",$time, + // isqrt_Quo, Quo, isqrt_Rem, guardBit, roundBit, stickyBit, zSig, zSigX0, zSigX, zExp_r, zExp_tmp, expNeedInc); +end + +assign status_o = use_fast_r ? status_reg : status_normal; +assign z_o = use_fast_r ? z_reg : z_normal; + +assign done_o = use_fast_r||isqrt_done; +assign ready_o = isqrt_ready&&~strobe_r; + +endmodule diff --git a/tb/tb_tf_fadd.v b/tb/tb_tf_fadd.v index d730ef3..4f88fc4 100644 --- a/tb/tb_tf_fadd.v +++ b/tb/tb_tf_fadd.v @@ -2,6 +2,7 @@ `include "R5FP_inc.vh" module R5FP_add_wrap #( + parameter USE_ACC=0, parameter EXP_W=5, parameter SIG_W=10) ( input [EXP_W+SIG_W:0] a, b, @@ -9,10 +10,13 @@ module R5FP_add_wrap #( output reg [7:0] zStatus, output [EXP_W+SIG_W:0] z); +localparam I_SIG_W=SIG_W*2+5; +localparam EXP_W_P1=EXP_W+1; + wire [EXP_W+SIG_W+1:0] ax, bx, zx; -wire [EXP_W:0] zExp; +wire [EXP_W:0] zExp,tailZeroCnt; wire [6-1:0] zStatusMiddle; -wire [SIG_W+4-1:0] zSig; +wire [I_SIG_W-1:0] zSig; wire zSign; R5FP_exp_incr #( @@ -22,25 +26,60 @@ R5FP_exp_incr #( .SIG_W(SIG_W), .EXP_W(EXP_W)) b_i (.a(b), .z(bx)); -R5FP_add #( - .SIG_W(SIG_W), - .EXP_W(EXP_W+1)) add ( - .a(ax), .b(bx), - .zExp(zExp), .zStatus(zStatusMiddle), - .zSig(zSig), .zSign(zSign)); +wire [EXP_W_P1-1:0] dExp; +wire [6-1:0] dStatus; +wire [SIG_W*2+2:0] dSig; +wire dSign; +generate +if (USE_ACC) begin + R5FP_mul_by_1 #( + .EXP_W(EXP_W_P1), + .SIG_W(SIG_W)) mul ( + .a(ax), + .zExp(dExp), + .zStatus(dStatus), + .zSig(dSig), + .zSign(dSign)); + R5FP_acc #( + .SIG_W(SIG_W), + .EXP_W(EXP_W_P1)) acc ( + .dExp(dExp), + .dStatus(dStatus), + .dSig(dSig), + .dSign(dSign), + .toInf(1'b0), + .c(bx), + .rnd(rnd), + +/* verilator lint_off PINCONNECTEMPTY */ + .zToInf(), + .specialTiny(), +/* verilator lint_on PINCONNECTEMPTY */ + .zExp(zExp), .tailZeroCnt(tailZeroCnt), .zStatus(zStatusMiddle), + .zSig(zSig), .zSign(zSign)); +end +else begin + R5FP_add #( + .SIG_W(SIG_W), + .EXP_W(EXP_W+1)) add ( + .a(ax), .b(bx), + .zExp(zExp), .tailZeroCnt(tailZeroCnt), .zStatus(zStatusMiddle), + .zSig(zSig), .zSign(zSign)); +end +endgenerate R5FP_postproc #( - .I_SIG_W(SIG_W+4), + .I_SIG_W(I_SIG_W), .SIG_W(SIG_W), .EXP_W(EXP_W+1)) pp ( + .tailZeroCnt(tailZeroCnt), .aExp(zExp), .aStatus(zStatusMiddle), .aSig(zSig), .rnd(rnd), .aSign(zSign), -/* verilator lint_off PINCONNECTEMPTY */ - .specialZRnd(), -/* verilator lint_on PINCONNECTEMPTY */ + .zToInf(1'b0), + .specialTiny(1'b0), .z(zx), .zStatus(zStatus)); @@ -57,8 +96,13 @@ module tb_fp_add(input clk, /* verilator lint_on UNUSED */ input [2:0] rnd); +`ifdef FP64 +parameter EXP_W=11; +parameter SIG_W=52; +`else parameter EXP_W=8; parameter SIG_W=23; +`endif integer fd, readcount; logic aSign; @@ -82,6 +126,7 @@ assign {bSign,bExp,bSig}=b; assign {z0Sign,z0Exp,z0Sig}=z0; R5FP_add_wrap #( + .USE_ACC(1), .EXP_W(EXP_W), .SIG_W(SIG_W)) I ( .a(a), .b(b), .rnd(rnd[2:0]), .z({ySign,yExp,ySig}), .zStatus(ySpre)); @@ -128,8 +173,9 @@ always @(posedge clk) begin $display("a: %b.%b.%b b: %b.%b.%b z0: %b.%b.%b", aSign,aExp,aSig, bSign,bExp,bSig, z0Sign,z0Exp,z0Sig); $display("a: %b.%b.%b b: %b.%b.%b y: %b.%b.%b", aSign,aExp,aSig, bSign,bExp,bSig, ySign,yExp,ySig); $display("ax: %b.%b.%b bx: %b.%b.%b zx: %b.%b.%b", I.ax[EXP_W+SIG_W+1],I.ax[EXP_W+SIG_W:SIG_W],I.ax[SIG_W-1:0], I.bx[EXP_W+SIG_W+1],I.bx[EXP_W+SIG_W:SIG_W],I.bx[SIG_W-1:0], I.zx[EXP_W+SIG_W+1],I.zx[EXP_W+SIG_W:SIG_W],I.zx[SIG_W-1:0]); - $display("s0: %b yS:%b invalid:%b zStatusMiddle:%b pp.zStatus:%b ySpre:%b", - s0, yS, I.add.I.isInvalid, I.zStatusMiddle, I.pp.zStatus, ySpre); + $display("dx: %b.%b.%b", I.dSign, I.dExp, I.dSig); + //$display("s0: %b yS:%b invalid:%b zStatusMiddle:%b pp.zStatus:%b ySpre:%b", + // s0, yS, I.add.I.isInvalid, I.zStatusMiddle, I.pp.zStatus, ySpre); $finish(); end end diff --git a/tb/tb_tf_fdiv.v b/tb/tb_tf_fdiv.v index 0385e68..9c4a2ca 100644 --- a/tb/tb_tf_fdiv.v +++ b/tb/tb_tf_fdiv.v @@ -29,9 +29,9 @@ logic idiv_strobe; logic [ExtWidth-1:0] idiv_Quo, idiv_Rem; logic idiv_done; logic idiv_ready; -logic [EXP_W:0] xExp; +logic [EXP_W:0] xExp, tailZeroCnt; logic [SIG_W+3-1:0] xSig; -logic [4:0] xMidStatus; +logic [5:0] xMidStatus; logic [7:0] xStatus_fast; logic [7:0] zStatus; logic [SIG_W+EXP_W+1:0] x_fast; @@ -45,6 +45,7 @@ R5FP_div #( .rnd_i(rnd_i), .strobe_i(strobe_i), .xExp_o(xExp), + .tailZeroCnt_o(tailZeroCnt), .xSig_o(xSig), .xMidStatus_o(xMidStatus), @@ -84,13 +85,13 @@ R5FP_postproc #( .SIG_W(SIG_W), .EXP_W(EXP_W+1)) pp ( .aExp(xExp), + .tailZeroCnt(tailZeroCnt), .aStatus(xMidStatus), .aSig({1'b0,xSig}), .rnd(rnd), .aSign(xMidStatus[`SIGN]), -/* verilator lint_off PINCONNECTEMPTY */ - .specialZRnd(), -/* verilator lint_on PINCONNECTEMPTY */ + .zToInf(1'b0), + .specialTiny(1'b0), .z(zx), .zStatus(zStatus)); @@ -107,8 +108,13 @@ endmodule module tb_fdiv(input clk, reset, input [2:0] rnd); +`ifdef FP64 +parameter EXP_W=11; +parameter SIG_W=52; +`else parameter SIG_W=23; parameter EXP_W=8; +`endif logic done,strobe; logic [7:0] status; diff --git a/tb/tb_tf_fmac.v b/tb/tb_tf_fmac.v index abae88a..3fcb842 100644 --- a/tb/tb_tf_fmac.v +++ b/tb/tb_tf_fmac.v @@ -13,7 +13,7 @@ localparam I_SIG_W=SIG_W*2+5; localparam EXP_W_P1=EXP_W+1; wire [EXP_W+SIG_W+1:0] ax, bx, cx, zx; -wire [EXP_W:0] zExp; +wire [EXP_W:0] zExp, tailZeroCnt; wire [6-1:0] zStatusMiddle; wire [SIG_W*2+4:0] zSig; wire zSign; @@ -38,6 +38,9 @@ R5FP_mul #( .SIG_W(SIG_W)) mul ( .a(ax), .b(bx), .zExp(dExp), +/* verilator lint_off PINCONNECTEMPTY */ + .tailZeroCnt(), +/* verilator lint_on PINCONNECTEMPTY */ .toInf(toInf), .zStatus(dStatus), .zSig(dSig), @@ -57,37 +60,37 @@ R5FP_acc #( .zToInf(zToInf), .specialTiny(specialTiny), - .zExp(zExp), .zStatus(zStatusMiddle), + .zExp(zExp), .tailZeroCnt(tailZeroCnt), .zStatus(zStatusMiddle), .zSig(zSig), .zSign(zSign)); -wire [7:0] zStatusPre; -wire specialZRnd; R5FP_postproc #( .I_SIG_W(I_SIG_W), .SIG_W(SIG_W), .EXP_W(EXP_W_P1)) pp ( .aExp(zExp), + .tailZeroCnt(tailZeroCnt), .aStatus(zStatusMiddle), .aSig(zSig), .aSign(zSign), + .zToInf(zToInf), + .specialTiny(specialTiny), .rnd(rnd), .z(zx), - .specialZRnd(specialZRnd), - .zStatus(zStatusPre)); + .zStatus(zStatus)); R5FP_exp_decr #( .SIG_W(SIG_W), .EXP_W(EXP_W)) z_d (.a(zx), .z(z)); -always @(*) begin - //reg specialZ; - zStatus=zStatusPre; - if(zToInf) begin - zStatus[`Z_INEXACT]=1; - zStatus[`Z_HUGE]=1; - end - if(specialTiny&&specialZRnd) zStatus[`Z_TINY]=1; // I don't know why... -end +//always @(*) begin +// //reg specialZ; +// zStatus=zStatusPre; +// if(zToInf) begin +// zStatus[`Z_INEXACT]=1; +// zStatus[`Z_HUGE]=1; +// end +// if(specialTiny&&specialZRnd) zStatus[`Z_TINY]=1; // I don't know why... +//end endmodule @@ -98,9 +101,13 @@ module tb_fp_mac(input clk, /* verilator lint_on UNUSED */ input [2:0] rnd); +`ifdef FP64 +parameter EXP_W=11; +parameter SIG_W=52; +`else parameter EXP_W=8; parameter SIG_W=23; -localparam I_SIG_W=SIG_W*2+5; +`endif integer fd, readcount; logic aSign; diff --git a/tb/tb_tf_fmul.v b/tb/tb_tf_fmul.v index 3ccaeda..e1d30bc 100644 --- a/tb/tb_tf_fmul.v +++ b/tb/tb_tf_fmul.v @@ -10,7 +10,7 @@ module R5FP_mul_wrap #( output [EXP_W+SIG_W:0] z); wire [EXP_W+SIG_W+1:0] ax, bx, zx; -wire [EXP_W:0] zExp; +wire [EXP_W:0] zExp, tailZeroCnt; wire [6-1:0] zStatusMiddle; wire [SIG_W*2+2:0] zSig; wire zSign; @@ -29,7 +29,7 @@ R5FP_mul #( /* verilator lint_off PINCONNECTEMPTY */ .toInf(), /* verilator lint_on PINCONNECTEMPTY */ - .zExp(zExp), .zStatus(zStatusMiddle), + .zExp(zExp), .tailZeroCnt(tailZeroCnt), .zStatus(zStatusMiddle), .zSig(zSig), .zSign(zSign)); R5FP_postproc #( @@ -37,13 +37,13 @@ R5FP_postproc #( .SIG_W(SIG_W), .EXP_W(EXP_W+1)) pp ( .aExp(zExp), + .tailZeroCnt(tailZeroCnt), .aStatus(zStatusMiddle), .aSig(zSig), .aSign(zSign), .rnd(rnd), -/* verilator lint_off PINCONNECTEMPTY */ - .specialZRnd(), -/* verilator lint_on PINCONNECTEMPTY */ + .zToInf(1'b0), + .specialTiny(1'b0), .z(zx), .zStatus(zStatus)); @@ -60,8 +60,13 @@ module tb_fp_mul(input clk, /* verilator lint_on UNUSED */ input [2:0] rnd); +`ifdef FP64 +parameter EXP_W=11; +parameter SIG_W=52; +`else parameter EXP_W=8; parameter SIG_W=23; +`endif integer fd, readcount; logic aSign; diff --git a/tb/tb_tf_fsqrt.v b/tb/tb_tf_fsqrt.v index aea2cab..1788184 100644 --- a/tb/tb_tf_fsqrt.v +++ b/tb/tb_tf_fsqrt.v @@ -71,8 +71,13 @@ endmodule module tb_fsqrt(input clk, reset, input [2:0] rnd); +`ifdef FP64 +parameter EXP_W=11; +parameter SIG_W=52; +`else parameter SIG_W=23; parameter EXP_W=8; +`endif logic done,strobe; logic [7:0] status;