diff --git a/src/main/resources/vsrc/RoccBlackBox.v b/src/main/resources/vsrc/RoccBlackBox.v index 5c0a0766c95..b8fa74df910 100644 --- a/src/main/resources/vsrc/RoccBlackBox.v +++ b/src/main/resources/vsrc/RoccBlackBox.v @@ -15,153 +15,154 @@ module RoccBlackBox fLen = 64, FPConstants_FLAGS_SZ = 5) ( input clock, - input reset, - output rocc_cmd_ready, - input rocc_cmd_valid, - input [6:0] rocc_cmd_bits_inst_funct, - input [4:0] rocc_cmd_bits_inst_rs2, - input [4:0] rocc_cmd_bits_inst_rs1, - input rocc_cmd_bits_inst_xd, - input rocc_cmd_bits_inst_xs1, - input rocc_cmd_bits_inst_xs2, - input [4:0] rocc_cmd_bits_inst_rd, - input [6:0] rocc_cmd_bits_inst_opcode, - input [xLen-1:0] rocc_cmd_bits_rs1, - input [xLen-1:0] rocc_cmd_bits_rs2, - input rocc_cmd_bits_status_debug, - input rocc_cmd_bits_status_cease, - input rocc_cmd_bits_status_wfi, - input [31:0] rocc_cmd_bits_status_isa, - input [PRV_SZ-1:0] rocc_cmd_bits_status_dprv, - input rocc_cmd_bits_status_dv, - input [PRV_SZ-1:0] rocc_cmd_bits_status_prv, - input rocc_cmd_bits_status_v, - input rocc_cmd_bits_status_sd, - input [22:0] rocc_cmd_bits_status_zero2, - input rocc_cmd_bits_status_mpv, - input rocc_cmd_bits_status_gva, - input rocc_cmd_bits_status_mbe, - input rocc_cmd_bits_status_sbe, - input [1:0] rocc_cmd_bits_status_sxl, - input [1:0] rocc_cmd_bits_status_uxl, - input rocc_cmd_bits_status_sd_rv32, - input [7:0] rocc_cmd_bits_status_zero1, - input rocc_cmd_bits_status_tsr, - input rocc_cmd_bits_status_tw, - input rocc_cmd_bits_status_tvm, - input rocc_cmd_bits_status_mxr, - input rocc_cmd_bits_status_sum, - input rocc_cmd_bits_status_mprv, - input [1:0] rocc_cmd_bits_status_xs, - input [1:0] rocc_cmd_bits_status_fs, - input [1:0] rocc_cmd_bits_status_vs, - input [1:0] rocc_cmd_bits_status_mpp, - input [0:0] rocc_cmd_bits_status_spp, - input rocc_cmd_bits_status_mpie, - input rocc_cmd_bits_status_ube, - input rocc_cmd_bits_status_spie, - input rocc_cmd_bits_status_upie, - input rocc_cmd_bits_status_mie, - input rocc_cmd_bits_status_hie, - input rocc_cmd_bits_status_sie, - input rocc_cmd_bits_status_uie, - input rocc_resp_ready, - output rocc_resp_valid, - output [4:0] rocc_resp_bits_rd, - output [xLen-1:0] rocc_resp_bits_data, - input rocc_mem_req_ready, - output rocc_mem_req_valid, - output [coreMaxAddrBits-1:0] rocc_mem_req_bits_addr, - output [dcacheReqTagBits-1:0] rocc_mem_req_bits_tag, - output [M_SZ-1:0] rocc_mem_req_bits_cmd, + input reset, + output rocc_cmd_ready, + input rocc_cmd_valid, + input [6:0] rocc_cmd_bits_inst_funct, + input [4:0] rocc_cmd_bits_inst_rs2, + input [4:0] rocc_cmd_bits_inst_rs1, + input rocc_cmd_bits_inst_xd, + input rocc_cmd_bits_inst_xs1, + input rocc_cmd_bits_inst_xs2, + input [4:0] rocc_cmd_bits_inst_rd, + input [6:0] rocc_cmd_bits_inst_opcode, + input [xLen-1:0] rocc_cmd_bits_rs1, + input [xLen-1:0] rocc_cmd_bits_rs2, + input rocc_cmd_bits_status_debug, + input rocc_cmd_bits_status_cease, + input rocc_cmd_bits_status_wfi, + input [31:0] rocc_cmd_bits_status_isa, + input [PRV_SZ-1:0] rocc_cmd_bits_status_dprv, + input rocc_cmd_bits_status_dv, + input [PRV_SZ-1:0] rocc_cmd_bits_status_prv, + input rocc_cmd_bits_status_v, + input rocc_cmd_bits_status_sd, + input [22:0] rocc_cmd_bits_status_zero2, + input rocc_cmd_bits_status_mpv, + input rocc_cmd_bits_status_gva, + input rocc_cmd_bits_status_mbe, + input rocc_cmd_bits_status_sbe, + input [1:0] rocc_cmd_bits_status_sxl, + input [1:0] rocc_cmd_bits_status_uxl, + input rocc_cmd_bits_status_sd_rv32, + input [7:0] rocc_cmd_bits_status_zero1, + input rocc_cmd_bits_status_tsr, + input rocc_cmd_bits_status_tw, + input rocc_cmd_bits_status_tvm, + input rocc_cmd_bits_status_mxr, + input rocc_cmd_bits_status_sum, + input rocc_cmd_bits_status_mprv, + input [1:0] rocc_cmd_bits_status_xs, + input [1:0] rocc_cmd_bits_status_fs, + input [1:0] rocc_cmd_bits_status_vs, + input [1:0] rocc_cmd_bits_status_mpp, + input [0:0] rocc_cmd_bits_status_spp, + input rocc_cmd_bits_status_mpie, + input rocc_cmd_bits_status_ube, + input rocc_cmd_bits_status_spie, + input rocc_cmd_bits_status_upie, + input rocc_cmd_bits_status_mie, + input rocc_cmd_bits_status_hie, + input rocc_cmd_bits_status_sie, + input rocc_cmd_bits_status_uie, + input rocc_resp_ready, + output rocc_resp_valid, + output [4:0] rocc_resp_bits_rd, + output [xLen-1:0] rocc_resp_bits_data, + input rocc_mem_req_ready, + output rocc_mem_req_valid, + output [coreMaxAddrBits-1:0] rocc_mem_req_bits_addr, + output [dcacheReqTagBits-1:0] rocc_mem_req_bits_tag, + output [M_SZ-1:0] rocc_mem_req_bits_cmd, output [mem_req_bits_size_width-1:0] rocc_mem_req_bits_size, - output rocc_mem_req_bits_signed, - output rocc_mem_req_bits_phys, - output rocc_mem_req_bits_no_alloc, - output rocc_mem_req_bits_no_xcpt, - output [1:0] rocc_mem_req_bits_dprv, - output rocc_mem_req_bits_dv, - output [coreDataBits-1:0] rocc_mem_req_bits_data, - output [coreDataBytes-1:0] rocc_mem_req_bits_mask, - output rocc_mem_s1_kill, - output [coreDataBits-1:0] rocc_mem_s1_data_data, - output [coreDataBytes-1:0] rocc_mem_s1_data_mask, - input rocc_mem_s2_nack, - input rocc_mem_s2_nack_cause_raw, - output rocc_mem_s2_kill, - input rocc_mem_s2_uncached, - input [paddrBits-1:0] rocc_mem_s2_paddr, - input [vaddrBitsExtended-1:0] rocc_mem_s2_gpa, - input rocc_mem_s2_gpa_is_pte, - input rocc_mem_resp_valid, - input [coreMaxAddrBits-1:0] rocc_mem_resp_bits_addr, - input [dcacheReqTagBits-1:0] rocc_mem_resp_bits_tag, - input [M_SZ-1:0] rocc_mem_resp_bits_cmd, - input [mem_req_bits_size_width-1:0] rocc_mem_resp_bits_size, - input rocc_mem_resp_bits_signed, - input [coreDataBits-1:0] rocc_mem_resp_bits_data, - input [coreDataBytes-1:0] rocc_mem_resp_bits_mask, - input rocc_mem_resp_bits_replay, - input rocc_mem_resp_bits_has_data, - input [coreDataBits-1:0] rocc_mem_resp_bits_data_word_bypass, - input [coreDataBits-1:0] rocc_mem_resp_bits_data_raw, - input [coreDataBits-1:0] rocc_mem_resp_bits_store_data, - input [1:0] rocc_mem_resp_bits_dprv, - input rocc_mem_resp_bits_dv, - input rocc_mem_replay_next, - input rocc_mem_s2_xcpt_ma_ld, - input rocc_mem_s2_xcpt_ma_st, - input rocc_mem_s2_xcpt_pf_ld, - input rocc_mem_s2_xcpt_pf_st, - input rocc_mem_s2_xcpt_gf_ld, - input rocc_mem_s2_xcpt_gf_st, - input rocc_mem_s2_xcpt_ae_ld, - input rocc_mem_s2_xcpt_ae_st, - input rocc_mem_ordered, - input rocc_mem_perf_acquire, - input rocc_mem_perf_release, - input rocc_mem_perf_grant, - input rocc_mem_perf_tlbMiss, - input rocc_mem_perf_blocked, - input rocc_mem_perf_canAcceptStoreThenLoad, - input rocc_mem_perf_canAcceptStoreThenRMW, - input rocc_mem_perf_canAcceptLoadThenLoad, - input rocc_mem_perf_storeBufferEmptyAfterLoad, - input rocc_mem_perf_storeBufferEmptyAfterStore, - output rocc_mem_keep_clock_enabled, - input rocc_mem_clock_enabled, - output rocc_busy, - output rocc_interrupt, - input rocc_exception, - input rocc_fpu_req_ready, - output rocc_fpu_req_valid, - output rocc_fpu_req_bits_ldst, - output rocc_fpu_req_bits_wen, - output rocc_fpu_req_bits_ren1, - output rocc_fpu_req_bits_ren2, - output rocc_fpu_req_bits_ren3, - output rocc_fpu_req_bits_swap12, - output rocc_fpu_req_bits_swap23, - output [1:0] rocc_fpu_req_bits_typeTagIn, - output [1:0] rocc_fpu_req_bits_typeTagOut, - output rocc_fpu_req_bits_fromint, - output rocc_fpu_req_bits_toint, - output rocc_fpu_req_bits_fastpipe, - output rocc_fpu_req_bits_fma, - output rocc_fpu_req_bits_div, - output rocc_fpu_req_bits_sqrt, - output rocc_fpu_req_bits_wflags, - output [FPConstants_RM_SZ-1:0] rocc_fpu_req_bits_rm, - output [1:0] rocc_fpu_req_bits_fmaCmd, - output [1:0] rocc_fpu_req_bits_typ, - output [1:0] rocc_fpu_req_bits_fmt, - output [fLen:0] rocc_fpu_req_bits_in1, - output [fLen:0] rocc_fpu_req_bits_in2, - output [fLen:0] rocc_fpu_req_bits_in3, - output rocc_fpu_resp_ready, - input rocc_fpu_resp_valid, - input [fLen:0] rocc_fpu_resp_bits_data, - input [FPConstants_FLAGS_SZ-1:0] rocc_fpu_resp_bits_exc ); + output rocc_mem_req_bits_signed, + output rocc_mem_req_bits_phys, + output rocc_mem_req_bits_no_alloc, + output rocc_mem_req_bits_no_xcpt, + output rocc_mem_req_bits_no_resp, + output [1:0] rocc_mem_req_bits_dprv, + output rocc_mem_req_bits_dv, + output [coreDataBits-1:0] rocc_mem_req_bits_data, + output [coreDataBytes-1:0] rocc_mem_req_bits_mask, + output rocc_mem_s1_kill, + output [coreDataBits-1:0] rocc_mem_s1_data_data, + output [coreDataBytes-1:0] rocc_mem_s1_data_mask, + input rocc_mem_s2_nack, + input rocc_mem_s2_nack_cause_raw, + output rocc_mem_s2_kill, + input rocc_mem_s2_uncached, + input [paddrBits-1:0] rocc_mem_s2_paddr, + input [vaddrBitsExtended-1:0] rocc_mem_s2_gpa, + input rocc_mem_s2_gpa_is_pte, + input rocc_mem_resp_valid, + input [coreMaxAddrBits-1:0] rocc_mem_resp_bits_addr, + input [dcacheReqTagBits-1:0] rocc_mem_resp_bits_tag, + input [M_SZ-1:0] rocc_mem_resp_bits_cmd, + input [mem_req_bits_size_width-1:0] rocc_mem_resp_bits_size, + input rocc_mem_resp_bits_signed, + input [coreDataBits-1:0] rocc_mem_resp_bits_data, + input [coreDataBytes-1:0] rocc_mem_resp_bits_mask, + input rocc_mem_resp_bits_replay, + input rocc_mem_resp_bits_has_data, + input [coreDataBits-1:0] rocc_mem_resp_bits_data_word_bypass, + input [coreDataBits-1:0] rocc_mem_resp_bits_data_raw, + input [coreDataBits-1:0] rocc_mem_resp_bits_store_data, + input [1:0] rocc_mem_resp_bits_dprv, + input rocc_mem_resp_bits_dv, + input rocc_mem_replay_next, + input rocc_mem_s2_xcpt_ma_ld, + input rocc_mem_s2_xcpt_ma_st, + input rocc_mem_s2_xcpt_pf_ld, + input rocc_mem_s2_xcpt_pf_st, + input rocc_mem_s2_xcpt_gf_ld, + input rocc_mem_s2_xcpt_gf_st, + input rocc_mem_s2_xcpt_ae_ld, + input rocc_mem_s2_xcpt_ae_st, + input rocc_mem_ordered, + input rocc_mem_perf_acquire, + input rocc_mem_perf_release, + input rocc_mem_perf_grant, + input rocc_mem_perf_tlbMiss, + input rocc_mem_perf_blocked, + input rocc_mem_perf_canAcceptStoreThenLoad, + input rocc_mem_perf_canAcceptStoreThenRMW, + input rocc_mem_perf_canAcceptLoadThenLoad, + input rocc_mem_perf_storeBufferEmptyAfterLoad, + input rocc_mem_perf_storeBufferEmptyAfterStore, + output rocc_mem_keep_clock_enabled, + input rocc_mem_clock_enabled, + output rocc_busy, + output rocc_interrupt, + input rocc_exception, + input rocc_fpu_req_ready, + output rocc_fpu_req_valid, + output rocc_fpu_req_bits_ldst, + output rocc_fpu_req_bits_wen, + output rocc_fpu_req_bits_ren1, + output rocc_fpu_req_bits_ren2, + output rocc_fpu_req_bits_ren3, + output rocc_fpu_req_bits_swap12, + output rocc_fpu_req_bits_swap23, + output [1:0] rocc_fpu_req_bits_typeTagIn, + output [1:0] rocc_fpu_req_bits_typeTagOut, + output rocc_fpu_req_bits_fromint, + output rocc_fpu_req_bits_toint, + output rocc_fpu_req_bits_fastpipe, + output rocc_fpu_req_bits_fma, + output rocc_fpu_req_bits_div, + output rocc_fpu_req_bits_sqrt, + output rocc_fpu_req_bits_wflags, + output [FPConstants_RM_SZ-1:0] rocc_fpu_req_bits_rm, + output [1:0] rocc_fpu_req_bits_fmaCmd, + output [1:0] rocc_fpu_req_bits_typ, + output [1:0] rocc_fpu_req_bits_fmt, + output [fLen:0] rocc_fpu_req_bits_in1, + output [fLen:0] rocc_fpu_req_bits_in2, + output [fLen:0] rocc_fpu_req_bits_in3, + output rocc_fpu_resp_ready, + input rocc_fpu_resp_valid, + input [fLen:0] rocc_fpu_resp_bits_data, + input [FPConstants_FLAGS_SZ-1:0] rocc_fpu_resp_bits_exc ); assign rocc_cmd_ready = 1'b1; diff --git a/src/main/scala/groundtest/Tile.scala b/src/main/scala/groundtest/Tile.scala index f7bef2d439f..de84bed5645 100644 --- a/src/main/scala/groundtest/Tile.scala +++ b/src/main/scala/groundtest/Tile.scala @@ -43,10 +43,7 @@ abstract class GroundTestTile( dcacheOpt.foreach { m => m.hartIdSinkNodeOpt.foreach { _ := hartIdNexusNode } InModuleBody { - m.module match { - case module: DCacheModule => module.tlb_port := DontCare - case other => other - } + m.module.io.tlb_port := DontCare } } diff --git a/src/main/scala/rocket/DCache.scala b/src/main/scala/rocket/DCache.scala index 4d9195cf9ca..232c7b68cfd 100644 --- a/src/main/scala/rocket/DCache.scala +++ b/src/main/scala/rocket/DCache.scala @@ -91,8 +91,6 @@ class DCacheTLBPort(implicit p: Parameters) extends CoreBundle()(p) { } class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { - val tlb_port = IO(new DCacheTLBPort) - val tECC = cacheParams.tagCode val dECC = cacheParams.dataCode require(subWordBits % eccBits == 0, "subWordBits must be a multiple of eccBits") @@ -179,7 +177,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val s1_nack = WireDefault(false.B) val s1_valid_masked = s1_valid && !io.cpu.s1_kill val s1_valid_not_nacked = s1_valid && !s1_nack - val s1_tlb_req_valid = RegNext(tlb_port.req.fire, false.B) + val s1_tlb_req_valid = RegNext(io.tlb_port.req.fire, false.B) val s2_tlb_req_valid = RegNext(s1_tlb_req_valid, false.B) val s0_clk_en = metaArb.io.out.valid && !metaArb.io.out.bits.write @@ -190,8 +188,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val s1_req = RegEnable(s0_req, s0_clk_en) val s1_vaddr = Cat(s1_req.idx.getOrElse(s1_req.addr) >> tagLSB, s1_req.addr(tagLSB-1, 0)) - val s0_tlb_req = WireInit(tlb_port.req.bits) - when (!tlb_port.req.fire) { + val s0_tlb_req = WireInit(io.tlb_port.req.bits) + when (!io.tlb_port.req.fire) { s0_tlb_req.passthrough := s0_req.phys s0_tlb_req.vaddr := s0_req.addr s0_tlb_req.size := s0_req.size @@ -199,7 +197,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { s0_tlb_req.prv := s0_req.dprv s0_tlb_req.v := s0_req.dv } - val s1_tlb_req = RegEnable(s0_tlb_req, s0_clk_en || tlb_port.req.valid) + val s1_tlb_req = RegEnable(s0_tlb_req, s0_clk_en || io.tlb_port.req.valid) val s1_read = isRead(s1_req.cmd) val s1_write = isWrite(s1_req.cmd) @@ -263,7 +261,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { // address translation val s1_cmd_uses_tlb = s1_readwrite || s1_flush_line || s1_req.cmd === M_WOK io.ptw <> tlb.io.ptw - tlb.io.kill := io.cpu.s2_kill || s2_tlb_req_valid && tlb_port.s2_kill + tlb.io.kill := io.cpu.s2_kill || s2_tlb_req_valid && io.tlb_port.s2_kill tlb.io.req.valid := s1_tlb_req_valid || s1_valid && !io.cpu.s1_kill && s1_cmd_uses_tlb tlb.io.req.bits := s1_tlb_req when (!tlb.io.req.ready && !tlb.io.ptw.resp.valid && !io.cpu.req.bits.phys) { io.cpu.req.ready := false.B } @@ -277,8 +275,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { tlb.io.sfence.bits.hv := s1_req.cmd === M_HFENCEV tlb.io.sfence.bits.hg := s1_req.cmd === M_HFENCEG - tlb_port.req.ready := clock_en_reg - tlb_port.s1_resp := tlb.io.resp + io.tlb_port.req.ready := clock_en_reg + io.tlb_port.s1_resp := tlb.io.resp when (s1_tlb_req_valid && s1_valid && !(s1_req.phys && s1_req.no_xcpt)) { s1_nack := true.B } pma_checker.io <> DontCare @@ -1056,7 +1054,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { metaArb.io.out.valid || // subsumes resetting || flushing s1_probe || s2_probe || s1_valid || s2_valid || - tlb_port.req.valid || + io.tlb_port.req.valid || s1_tlb_req_valid || s2_tlb_req_valid || pstore1_held || pstore2_valid || release_state =/= s_ready || diff --git a/src/main/scala/rocket/HellaCache.scala b/src/main/scala/rocket/HellaCache.scala index 15cb4b6ac7e..cdad4b8e13b 100644 --- a/src/main/scala/rocket/HellaCache.scala +++ b/src/main/scala/rocket/HellaCache.scala @@ -119,6 +119,7 @@ trait HasCoreData extends HasCoreParameters { class HellaCacheReqInternal(implicit p: Parameters) extends CoreBundle()(p) with HasCoreMemOp { val phys = Bool() + val no_resp = Bool() // The dcache may omit generating a response for this request val no_alloc = Bool() val no_xcpt = Bool() } @@ -225,6 +226,7 @@ class HellaCacheBundle(implicit p: Parameters) extends CoreBundle()(p) { val cpu = Flipped(new HellaCacheIO) val ptw = new TLBPTWIO() val errors = new DCacheErrors + val tlb_port = new DCacheTLBPort } class HellaCacheModule(outer: HellaCache) extends LazyModuleImp(outer) @@ -272,10 +274,7 @@ trait HasHellaCache { this: BaseTile => dcache.hartIdSinkNodeOpt.map { _ := hartIdNexusNode } dcache.mmioAddressPrefixSinkNodeOpt.map { _ := mmioAddressPrefixNexusNode } InModuleBody { - dcache.module match { - case module: DCacheModule => module.tlb_port := DontCare - case other => other - } + dcache.module.io.tlb_port := DontCare } } diff --git a/src/main/scala/rocket/NBDcache.scala b/src/main/scala/rocket/NBDcache.scala index 510fe1f5e61..dfe6e9ac593 100644 --- a/src/main/scala/rocket/NBDcache.scala +++ b/src/main/scala/rocket/NBDcache.scala @@ -68,7 +68,7 @@ class IOMSHR(id: Int)(implicit edge: TLEdgeOut, p: Parameters) extends L1HellaCa val req = Reg(new HellaCacheReq) val grant_word = Reg(UInt(wordBits.W)) - val s_idle :: s_mem_access :: s_mem_ack :: s_resp :: Nil = Enum(4) + val s_idle :: s_mem_access :: s_mem_ack :: s_resp_1 :: s_resp_2 :: Nil = Enum(5) val state = RegInit(s_idle) io.req.ready := (state === s_idle) @@ -102,8 +102,8 @@ class IOMSHR(id: Int)(implicit edge: TLEdgeOut, p: Parameters) extends L1HellaCa io.mem_access.valid := (state === s_mem_access) io.mem_access.bits := Mux(isAMO(req.cmd), atomics, Mux(isRead(req.cmd), get, put)) - io.replay_next := (state === s_mem_ack) || io.resp.valid && !io.resp.ready - io.resp.valid := (state === s_resp) + io.replay_next := state === s_resp_1 || (state === s_resp_2 && !io.resp.ready) + io.resp.valid := state === s_resp_2 io.resp.bits.addr := req.addr io.resp.bits.idx.foreach(_ := req.idx.get) io.resp.bits.tag := req.tag @@ -130,12 +130,16 @@ class IOMSHR(id: Int)(implicit edge: TLEdgeOut, p: Parameters) extends L1HellaCa } when (state === s_mem_ack && io.mem_ack.valid) { - state := s_resp + state := Mux(req.no_resp || !isRead(req.cmd), s_idle, s_resp_1) when (isRead(req.cmd)) { grant_word := wordFromBeat(req.addr, io.mem_ack.bits.data) } } + when (state === s_resp_1) { + state := s_resp_2 + } + when (io.resp.fire) { state := s_idle } @@ -716,8 +720,11 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule val prober = Module(new ProbeUnit) val mshrs = Module(new MSHRFile) + io.tlb_port.req.ready := true.B io.cpu.req.ready := true.B val s1_valid = RegNext(io.cpu.req.fire, false.B) + val s1_tlb_req_valid = RegNext(io.tlb_port.req.fire, false.B) + val s1_tlb_req = RegEnable(io.tlb_port.req.bits, io.tlb_port.req.fire) val s1_req = Reg(new HellaCacheReq) val s1_valid_masked = s1_valid && !io.cpu.s1_kill val s1_replay = RegInit(false.B) @@ -725,6 +732,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule val s1_sfence = s1_req.cmd === M_SFENCE val s2_valid = RegNext(s1_valid_masked && !s1_sfence, false.B) && !io.cpu.s2_xcpt.asUInt.orR + val s2_tlb_req_valid = RegNext(s1_tlb_req_valid, false.B) val s2_req = Reg(new HellaCacheReq) val s2_replay = RegNext(s1_replay, false.B) && s2_req.cmd =/= M_FLUSH_ALL val s2_recycle = Wire(Bool()) @@ -751,6 +759,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule dtlb.io.req.bits.cmd := s1_req.cmd dtlb.io.req.bits.prv := s1_req.dprv dtlb.io.req.bits.v := s1_req.dv + when (s1_tlb_req_valid) { dtlb.io.req.bits := s1_tlb_req } when (!dtlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := false.B } dtlb.io.sfence.valid := s1_valid && !io.cpu.s1_kill && s1_sfence @@ -778,13 +787,16 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule when (s2_recycle) { s1_req := s2_req } - val s1_addr = dtlb.io.resp.paddr + val s1_addr = Mux(s1_req.phys, s1_req.addr, dtlb.io.resp.paddr) + + io.tlb_port.s1_resp := dtlb.io.resp when (s1_clk_en) { s2_req.size := s1_req.size s2_req.signed := s1_req.signed s2_req.phys := s1_req.phys s2_req.addr := s1_addr + s2_req.no_resp := s1_req.no_resp when (s1_write) { s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.s1_data.data) } @@ -990,7 +1002,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule amoalu.io.rhs := s2_req.data // nack it like it's hot - val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss || io.cpu.s2_nack || + val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss || io.cpu.s2_nack || s1_tlb_req_valid || s1_req.addr(idxMSB,idxLSB) === prober.io.meta_write.bits.idx && !prober.io.req.ready val s2_nack_hit = RegEnable(s1_nack, s1_valid || s1_replay) when (s2_nack_hit) { mshrs.io.req.valid := false.B } diff --git a/src/main/scala/rocket/PTW.scala b/src/main/scala/rocket/PTW.scala index 5eb7a08b8c3..5395cc10bf2 100644 --- a/src/main/scala/rocket/PTW.scala +++ b/src/main/scala/rocket/PTW.scala @@ -534,6 +534,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( io.mem.req.bits.dprv := PRV.S.U // PTW accesses are S-mode by definition io.mem.req.bits.dv := do_both_stages && !stage2 io.mem.req.bits.tag := DontCare + io.mem.req.bits.no_resp := false.B io.mem.req.bits.no_alloc := DontCare io.mem.req.bits.no_xcpt := DontCare io.mem.req.bits.data := DontCare diff --git a/src/main/scala/rocket/RocketCore.scala b/src/main/scala/rocket/RocketCore.scala index ca3900fc24c..63c223a7562 100644 --- a/src/main/scala/rocket/RocketCore.scala +++ b/src/main/scala/rocket/RocketCore.scala @@ -946,6 +946,7 @@ class Rocket(tile: RocketTile)(implicit p: Parameters) extends CoreModule()(p) io.dmem.req.bits.idx.foreach(_ := io.dmem.req.bits.addr) io.dmem.req.bits.dprv := Mux(ex_reg_hls, csr.io.hstatus.spvp, csr.io.status.dprv) io.dmem.req.bits.dv := ex_reg_hls || csr.io.status.dv + io.dmem.req.bits.no_resp := !isRead(ex_ctrl.mem_cmd) || (!ex_ctrl.fp && ex_waddr === 0.U) io.dmem.req.bits.no_alloc := DontCare io.dmem.req.bits.no_xcpt := DontCare io.dmem.req.bits.data := DontCare diff --git a/src/main/scala/rocket/ScratchpadSlavePort.scala b/src/main/scala/rocket/ScratchpadSlavePort.scala index 998fe7e26c9..a2a0b9d37f5 100644 --- a/src/main/scala/rocket/ScratchpadSlavePort.scala +++ b/src/main/scala/rocket/ScratchpadSlavePort.scala @@ -89,6 +89,7 @@ class ScratchpadSlavePort(address: Seq[AddressSet], coreDataBytes: Int, usingAto req.tag := 0.U req.phys := true.B req.no_xcpt := true.B + req.no_resp := false.B req.data := 0.U req.no_alloc := false.B req.mask := 0.U diff --git a/src/main/scala/tile/LazyRoCC.scala b/src/main/scala/tile/LazyRoCC.scala index f6fe824e3be..eb2ea1a92d9 100644 --- a/src/main/scala/tile/LazyRoCC.scala +++ b/src/main/scala/tile/LazyRoCC.scala @@ -195,6 +195,7 @@ class AccumulatorExampleModuleImp(outer: AccumulatorExample)(implicit p: Paramet io.mem.req.bits.phys := false.B io.mem.req.bits.dprv := cmd.bits.status.dprv io.mem.req.bits.dv := cmd.bits.status.dv + io.mem.req.bits.no_resp := false.B } class TranslatorExample(opcodes: OpcodeSet)(implicit p: Parameters) extends LazyRoCC(opcodes, nPTWPorts = 1) {