Skip to content

Commit

Permalink
Merge pull request #3588 from chipsalliance/tlb_port
Browse files Browse the repository at this point in the history
Generalize tlb_port | improve NBDcache performance
  • Loading branch information
jerryz123 committed Mar 15, 2024
2 parents 6d00957 + b50ad58 commit a721154
Show file tree
Hide file tree
Showing 9 changed files with 181 additions and 170 deletions.
293 changes: 147 additions & 146 deletions src/main/resources/vsrc/RoccBlackBox.v
Original file line number Diff line number Diff line change
Expand Up @@ -15,153 +15,154 @@ module RoccBlackBox
fLen = 64,
FPConstants_FLAGS_SZ = 5)
( input clock,
input reset,
output rocc_cmd_ready,
input rocc_cmd_valid,
input [6:0] rocc_cmd_bits_inst_funct,
input [4:0] rocc_cmd_bits_inst_rs2,
input [4:0] rocc_cmd_bits_inst_rs1,
input rocc_cmd_bits_inst_xd,
input rocc_cmd_bits_inst_xs1,
input rocc_cmd_bits_inst_xs2,
input [4:0] rocc_cmd_bits_inst_rd,
input [6:0] rocc_cmd_bits_inst_opcode,
input [xLen-1:0] rocc_cmd_bits_rs1,
input [xLen-1:0] rocc_cmd_bits_rs2,
input rocc_cmd_bits_status_debug,
input rocc_cmd_bits_status_cease,
input rocc_cmd_bits_status_wfi,
input [31:0] rocc_cmd_bits_status_isa,
input [PRV_SZ-1:0] rocc_cmd_bits_status_dprv,
input rocc_cmd_bits_status_dv,
input [PRV_SZ-1:0] rocc_cmd_bits_status_prv,
input rocc_cmd_bits_status_v,
input rocc_cmd_bits_status_sd,
input [22:0] rocc_cmd_bits_status_zero2,
input rocc_cmd_bits_status_mpv,
input rocc_cmd_bits_status_gva,
input rocc_cmd_bits_status_mbe,
input rocc_cmd_bits_status_sbe,
input [1:0] rocc_cmd_bits_status_sxl,
input [1:0] rocc_cmd_bits_status_uxl,
input rocc_cmd_bits_status_sd_rv32,
input [7:0] rocc_cmd_bits_status_zero1,
input rocc_cmd_bits_status_tsr,
input rocc_cmd_bits_status_tw,
input rocc_cmd_bits_status_tvm,
input rocc_cmd_bits_status_mxr,
input rocc_cmd_bits_status_sum,
input rocc_cmd_bits_status_mprv,
input [1:0] rocc_cmd_bits_status_xs,
input [1:0] rocc_cmd_bits_status_fs,
input [1:0] rocc_cmd_bits_status_vs,
input [1:0] rocc_cmd_bits_status_mpp,
input [0:0] rocc_cmd_bits_status_spp,
input rocc_cmd_bits_status_mpie,
input rocc_cmd_bits_status_ube,
input rocc_cmd_bits_status_spie,
input rocc_cmd_bits_status_upie,
input rocc_cmd_bits_status_mie,
input rocc_cmd_bits_status_hie,
input rocc_cmd_bits_status_sie,
input rocc_cmd_bits_status_uie,
input rocc_resp_ready,
output rocc_resp_valid,
output [4:0] rocc_resp_bits_rd,
output [xLen-1:0] rocc_resp_bits_data,
input rocc_mem_req_ready,
output rocc_mem_req_valid,
output [coreMaxAddrBits-1:0] rocc_mem_req_bits_addr,
output [dcacheReqTagBits-1:0] rocc_mem_req_bits_tag,
output [M_SZ-1:0] rocc_mem_req_bits_cmd,
input reset,
output rocc_cmd_ready,
input rocc_cmd_valid,
input [6:0] rocc_cmd_bits_inst_funct,
input [4:0] rocc_cmd_bits_inst_rs2,
input [4:0] rocc_cmd_bits_inst_rs1,
input rocc_cmd_bits_inst_xd,
input rocc_cmd_bits_inst_xs1,
input rocc_cmd_bits_inst_xs2,
input [4:0] rocc_cmd_bits_inst_rd,
input [6:0] rocc_cmd_bits_inst_opcode,
input [xLen-1:0] rocc_cmd_bits_rs1,
input [xLen-1:0] rocc_cmd_bits_rs2,
input rocc_cmd_bits_status_debug,
input rocc_cmd_bits_status_cease,
input rocc_cmd_bits_status_wfi,
input [31:0] rocc_cmd_bits_status_isa,
input [PRV_SZ-1:0] rocc_cmd_bits_status_dprv,
input rocc_cmd_bits_status_dv,
input [PRV_SZ-1:0] rocc_cmd_bits_status_prv,
input rocc_cmd_bits_status_v,
input rocc_cmd_bits_status_sd,
input [22:0] rocc_cmd_bits_status_zero2,
input rocc_cmd_bits_status_mpv,
input rocc_cmd_bits_status_gva,
input rocc_cmd_bits_status_mbe,
input rocc_cmd_bits_status_sbe,
input [1:0] rocc_cmd_bits_status_sxl,
input [1:0] rocc_cmd_bits_status_uxl,
input rocc_cmd_bits_status_sd_rv32,
input [7:0] rocc_cmd_bits_status_zero1,
input rocc_cmd_bits_status_tsr,
input rocc_cmd_bits_status_tw,
input rocc_cmd_bits_status_tvm,
input rocc_cmd_bits_status_mxr,
input rocc_cmd_bits_status_sum,
input rocc_cmd_bits_status_mprv,
input [1:0] rocc_cmd_bits_status_xs,
input [1:0] rocc_cmd_bits_status_fs,
input [1:0] rocc_cmd_bits_status_vs,
input [1:0] rocc_cmd_bits_status_mpp,
input [0:0] rocc_cmd_bits_status_spp,
input rocc_cmd_bits_status_mpie,
input rocc_cmd_bits_status_ube,
input rocc_cmd_bits_status_spie,
input rocc_cmd_bits_status_upie,
input rocc_cmd_bits_status_mie,
input rocc_cmd_bits_status_hie,
input rocc_cmd_bits_status_sie,
input rocc_cmd_bits_status_uie,
input rocc_resp_ready,
output rocc_resp_valid,
output [4:0] rocc_resp_bits_rd,
output [xLen-1:0] rocc_resp_bits_data,
input rocc_mem_req_ready,
output rocc_mem_req_valid,
output [coreMaxAddrBits-1:0] rocc_mem_req_bits_addr,
output [dcacheReqTagBits-1:0] rocc_mem_req_bits_tag,
output [M_SZ-1:0] rocc_mem_req_bits_cmd,
output [mem_req_bits_size_width-1:0] rocc_mem_req_bits_size,
output rocc_mem_req_bits_signed,
output rocc_mem_req_bits_phys,
output rocc_mem_req_bits_no_alloc,
output rocc_mem_req_bits_no_xcpt,
output [1:0] rocc_mem_req_bits_dprv,
output rocc_mem_req_bits_dv,
output [coreDataBits-1:0] rocc_mem_req_bits_data,
output [coreDataBytes-1:0] rocc_mem_req_bits_mask,
output rocc_mem_s1_kill,
output [coreDataBits-1:0] rocc_mem_s1_data_data,
output [coreDataBytes-1:0] rocc_mem_s1_data_mask,
input rocc_mem_s2_nack,
input rocc_mem_s2_nack_cause_raw,
output rocc_mem_s2_kill,
input rocc_mem_s2_uncached,
input [paddrBits-1:0] rocc_mem_s2_paddr,
input [vaddrBitsExtended-1:0] rocc_mem_s2_gpa,
input rocc_mem_s2_gpa_is_pte,
input rocc_mem_resp_valid,
input [coreMaxAddrBits-1:0] rocc_mem_resp_bits_addr,
input [dcacheReqTagBits-1:0] rocc_mem_resp_bits_tag,
input [M_SZ-1:0] rocc_mem_resp_bits_cmd,
input [mem_req_bits_size_width-1:0] rocc_mem_resp_bits_size,
input rocc_mem_resp_bits_signed,
input [coreDataBits-1:0] rocc_mem_resp_bits_data,
input [coreDataBytes-1:0] rocc_mem_resp_bits_mask,
input rocc_mem_resp_bits_replay,
input rocc_mem_resp_bits_has_data,
input [coreDataBits-1:0] rocc_mem_resp_bits_data_word_bypass,
input [coreDataBits-1:0] rocc_mem_resp_bits_data_raw,
input [coreDataBits-1:0] rocc_mem_resp_bits_store_data,
input [1:0] rocc_mem_resp_bits_dprv,
input rocc_mem_resp_bits_dv,
input rocc_mem_replay_next,
input rocc_mem_s2_xcpt_ma_ld,
input rocc_mem_s2_xcpt_ma_st,
input rocc_mem_s2_xcpt_pf_ld,
input rocc_mem_s2_xcpt_pf_st,
input rocc_mem_s2_xcpt_gf_ld,
input rocc_mem_s2_xcpt_gf_st,
input rocc_mem_s2_xcpt_ae_ld,
input rocc_mem_s2_xcpt_ae_st,
input rocc_mem_ordered,
input rocc_mem_perf_acquire,
input rocc_mem_perf_release,
input rocc_mem_perf_grant,
input rocc_mem_perf_tlbMiss,
input rocc_mem_perf_blocked,
input rocc_mem_perf_canAcceptStoreThenLoad,
input rocc_mem_perf_canAcceptStoreThenRMW,
input rocc_mem_perf_canAcceptLoadThenLoad,
input rocc_mem_perf_storeBufferEmptyAfterLoad,
input rocc_mem_perf_storeBufferEmptyAfterStore,
output rocc_mem_keep_clock_enabled,
input rocc_mem_clock_enabled,
output rocc_busy,
output rocc_interrupt,
input rocc_exception,
input rocc_fpu_req_ready,
output rocc_fpu_req_valid,
output rocc_fpu_req_bits_ldst,
output rocc_fpu_req_bits_wen,
output rocc_fpu_req_bits_ren1,
output rocc_fpu_req_bits_ren2,
output rocc_fpu_req_bits_ren3,
output rocc_fpu_req_bits_swap12,
output rocc_fpu_req_bits_swap23,
output [1:0] rocc_fpu_req_bits_typeTagIn,
output [1:0] rocc_fpu_req_bits_typeTagOut,
output rocc_fpu_req_bits_fromint,
output rocc_fpu_req_bits_toint,
output rocc_fpu_req_bits_fastpipe,
output rocc_fpu_req_bits_fma,
output rocc_fpu_req_bits_div,
output rocc_fpu_req_bits_sqrt,
output rocc_fpu_req_bits_wflags,
output [FPConstants_RM_SZ-1:0] rocc_fpu_req_bits_rm,
output [1:0] rocc_fpu_req_bits_fmaCmd,
output [1:0] rocc_fpu_req_bits_typ,
output [1:0] rocc_fpu_req_bits_fmt,
output [fLen:0] rocc_fpu_req_bits_in1,
output [fLen:0] rocc_fpu_req_bits_in2,
output [fLen:0] rocc_fpu_req_bits_in3,
output rocc_fpu_resp_ready,
input rocc_fpu_resp_valid,
input [fLen:0] rocc_fpu_resp_bits_data,
input [FPConstants_FLAGS_SZ-1:0] rocc_fpu_resp_bits_exc );
output rocc_mem_req_bits_signed,
output rocc_mem_req_bits_phys,
output rocc_mem_req_bits_no_alloc,
output rocc_mem_req_bits_no_xcpt,
output rocc_mem_req_bits_no_resp,
output [1:0] rocc_mem_req_bits_dprv,
output rocc_mem_req_bits_dv,
output [coreDataBits-1:0] rocc_mem_req_bits_data,
output [coreDataBytes-1:0] rocc_mem_req_bits_mask,
output rocc_mem_s1_kill,
output [coreDataBits-1:0] rocc_mem_s1_data_data,
output [coreDataBytes-1:0] rocc_mem_s1_data_mask,
input rocc_mem_s2_nack,
input rocc_mem_s2_nack_cause_raw,
output rocc_mem_s2_kill,
input rocc_mem_s2_uncached,
input [paddrBits-1:0] rocc_mem_s2_paddr,
input [vaddrBitsExtended-1:0] rocc_mem_s2_gpa,
input rocc_mem_s2_gpa_is_pte,
input rocc_mem_resp_valid,
input [coreMaxAddrBits-1:0] rocc_mem_resp_bits_addr,
input [dcacheReqTagBits-1:0] rocc_mem_resp_bits_tag,
input [M_SZ-1:0] rocc_mem_resp_bits_cmd,
input [mem_req_bits_size_width-1:0] rocc_mem_resp_bits_size,
input rocc_mem_resp_bits_signed,
input [coreDataBits-1:0] rocc_mem_resp_bits_data,
input [coreDataBytes-1:0] rocc_mem_resp_bits_mask,
input rocc_mem_resp_bits_replay,
input rocc_mem_resp_bits_has_data,
input [coreDataBits-1:0] rocc_mem_resp_bits_data_word_bypass,
input [coreDataBits-1:0] rocc_mem_resp_bits_data_raw,
input [coreDataBits-1:0] rocc_mem_resp_bits_store_data,
input [1:0] rocc_mem_resp_bits_dprv,
input rocc_mem_resp_bits_dv,
input rocc_mem_replay_next,
input rocc_mem_s2_xcpt_ma_ld,
input rocc_mem_s2_xcpt_ma_st,
input rocc_mem_s2_xcpt_pf_ld,
input rocc_mem_s2_xcpt_pf_st,
input rocc_mem_s2_xcpt_gf_ld,
input rocc_mem_s2_xcpt_gf_st,
input rocc_mem_s2_xcpt_ae_ld,
input rocc_mem_s2_xcpt_ae_st,
input rocc_mem_ordered,
input rocc_mem_perf_acquire,
input rocc_mem_perf_release,
input rocc_mem_perf_grant,
input rocc_mem_perf_tlbMiss,
input rocc_mem_perf_blocked,
input rocc_mem_perf_canAcceptStoreThenLoad,
input rocc_mem_perf_canAcceptStoreThenRMW,
input rocc_mem_perf_canAcceptLoadThenLoad,
input rocc_mem_perf_storeBufferEmptyAfterLoad,
input rocc_mem_perf_storeBufferEmptyAfterStore,
output rocc_mem_keep_clock_enabled,
input rocc_mem_clock_enabled,
output rocc_busy,
output rocc_interrupt,
input rocc_exception,
input rocc_fpu_req_ready,
output rocc_fpu_req_valid,
output rocc_fpu_req_bits_ldst,
output rocc_fpu_req_bits_wen,
output rocc_fpu_req_bits_ren1,
output rocc_fpu_req_bits_ren2,
output rocc_fpu_req_bits_ren3,
output rocc_fpu_req_bits_swap12,
output rocc_fpu_req_bits_swap23,
output [1:0] rocc_fpu_req_bits_typeTagIn,
output [1:0] rocc_fpu_req_bits_typeTagOut,
output rocc_fpu_req_bits_fromint,
output rocc_fpu_req_bits_toint,
output rocc_fpu_req_bits_fastpipe,
output rocc_fpu_req_bits_fma,
output rocc_fpu_req_bits_div,
output rocc_fpu_req_bits_sqrt,
output rocc_fpu_req_bits_wflags,
output [FPConstants_RM_SZ-1:0] rocc_fpu_req_bits_rm,
output [1:0] rocc_fpu_req_bits_fmaCmd,
output [1:0] rocc_fpu_req_bits_typ,
output [1:0] rocc_fpu_req_bits_fmt,
output [fLen:0] rocc_fpu_req_bits_in1,
output [fLen:0] rocc_fpu_req_bits_in2,
output [fLen:0] rocc_fpu_req_bits_in3,
output rocc_fpu_resp_ready,
input rocc_fpu_resp_valid,
input [fLen:0] rocc_fpu_resp_bits_data,
input [FPConstants_FLAGS_SZ-1:0] rocc_fpu_resp_bits_exc );

assign rocc_cmd_ready = 1'b1;

Expand Down
5 changes: 1 addition & 4 deletions src/main/scala/groundtest/Tile.scala
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,7 @@ abstract class GroundTestTile(
dcacheOpt.foreach { m =>
m.hartIdSinkNodeOpt.foreach { _ := hartIdNexusNode }
InModuleBody {
m.module match {
case module: DCacheModule => module.tlb_port := DontCare
case other => other
}
m.module.io.tlb_port := DontCare
}
}

Expand Down
18 changes: 8 additions & 10 deletions src/main/scala/rocket/DCache.scala
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,6 @@ class DCacheTLBPort(implicit p: Parameters) extends CoreBundle()(p) {
}

class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val tlb_port = IO(new DCacheTLBPort)

val tECC = cacheParams.tagCode
val dECC = cacheParams.dataCode
require(subWordBits % eccBits == 0, "subWordBits must be a multiple of eccBits")
Expand Down Expand Up @@ -179,7 +177,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val s1_nack = WireDefault(false.B)
val s1_valid_masked = s1_valid && !io.cpu.s1_kill
val s1_valid_not_nacked = s1_valid && !s1_nack
val s1_tlb_req_valid = RegNext(tlb_port.req.fire, false.B)
val s1_tlb_req_valid = RegNext(io.tlb_port.req.fire, false.B)
val s2_tlb_req_valid = RegNext(s1_tlb_req_valid, false.B)
val s0_clk_en = metaArb.io.out.valid && !metaArb.io.out.bits.write

Expand All @@ -190,16 +188,16 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val s1_req = RegEnable(s0_req, s0_clk_en)
val s1_vaddr = Cat(s1_req.idx.getOrElse(s1_req.addr) >> tagLSB, s1_req.addr(tagLSB-1, 0))

val s0_tlb_req = WireInit(tlb_port.req.bits)
when (!tlb_port.req.fire) {
val s0_tlb_req = WireInit(io.tlb_port.req.bits)
when (!io.tlb_port.req.fire) {
s0_tlb_req.passthrough := s0_req.phys
s0_tlb_req.vaddr := s0_req.addr
s0_tlb_req.size := s0_req.size
s0_tlb_req.cmd := s0_req.cmd
s0_tlb_req.prv := s0_req.dprv
s0_tlb_req.v := s0_req.dv
}
val s1_tlb_req = RegEnable(s0_tlb_req, s0_clk_en || tlb_port.req.valid)
val s1_tlb_req = RegEnable(s0_tlb_req, s0_clk_en || io.tlb_port.req.valid)

val s1_read = isRead(s1_req.cmd)
val s1_write = isWrite(s1_req.cmd)
Expand Down Expand Up @@ -263,7 +261,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
// address translation
val s1_cmd_uses_tlb = s1_readwrite || s1_flush_line || s1_req.cmd === M_WOK
io.ptw <> tlb.io.ptw
tlb.io.kill := io.cpu.s2_kill || s2_tlb_req_valid && tlb_port.s2_kill
tlb.io.kill := io.cpu.s2_kill || s2_tlb_req_valid && io.tlb_port.s2_kill
tlb.io.req.valid := s1_tlb_req_valid || s1_valid && !io.cpu.s1_kill && s1_cmd_uses_tlb
tlb.io.req.bits := s1_tlb_req
when (!tlb.io.req.ready && !tlb.io.ptw.resp.valid && !io.cpu.req.bits.phys) { io.cpu.req.ready := false.B }
Expand All @@ -277,8 +275,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
tlb.io.sfence.bits.hv := s1_req.cmd === M_HFENCEV
tlb.io.sfence.bits.hg := s1_req.cmd === M_HFENCEG

tlb_port.req.ready := clock_en_reg
tlb_port.s1_resp := tlb.io.resp
io.tlb_port.req.ready := clock_en_reg
io.tlb_port.s1_resp := tlb.io.resp
when (s1_tlb_req_valid && s1_valid && !(s1_req.phys && s1_req.no_xcpt)) { s1_nack := true.B }

pma_checker.io <> DontCare
Expand Down Expand Up @@ -1056,7 +1054,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
metaArb.io.out.valid || // subsumes resetting || flushing
s1_probe || s2_probe ||
s1_valid || s2_valid ||
tlb_port.req.valid ||
io.tlb_port.req.valid ||
s1_tlb_req_valid || s2_tlb_req_valid ||
pstore1_held || pstore2_valid ||
release_state =/= s_ready ||
Expand Down

0 comments on commit a721154

Please sign in to comment.