Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
rocket-chip/src/main/scala/rocket/PTW.scala
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
420 lines (373 sloc)
14.6 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// See LICENSE.Berkeley for license details. | |
// See LICENSE.SiFive for license details. | |
package freechips.rocketchip.rocket | |
import Chisel._ | |
import Chisel.ImplicitConversions._ | |
import chisel3.withClock | |
import chisel3.internal.sourceinfo.SourceInfo | |
import chisel3.experimental.chiselName | |
import freechips.rocketchip.config.Parameters | |
import freechips.rocketchip.subsystem.CacheBlockBytes | |
import freechips.rocketchip.tile._ | |
import freechips.rocketchip.tilelink._ | |
import freechips.rocketchip.util._ | |
import freechips.rocketchip.util.property._ | |
import freechips.rocketchip.diplomaticobjectmodel.model.OMSRAM | |
import scala.collection.mutable.ListBuffer | |
class PTWReq(implicit p: Parameters) extends CoreBundle()(p) { | |
val addr = UInt(width = vpnBits) | |
} | |
class PTWResp(implicit p: Parameters) extends CoreBundle()(p) { | |
val ae = Bool() | |
val pte = new PTE | |
val level = UInt(width = log2Ceil(pgLevels)) | |
val fragmented_superpage = Bool() | |
val homogeneous = Bool() | |
} | |
class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) | |
with HasCoreParameters { | |
val req = Decoupled(Valid(new PTWReq)) | |
val resp = Valid(new PTWResp).flip | |
val ptbr = new PTBR().asInput | |
val status = new MStatus().asInput | |
val pmp = Vec(nPMPs, new PMP).asInput | |
val customCSRs = coreParams.customCSRs.asInput | |
} | |
class PTWPerfEvents extends Bundle { | |
val l2miss = Bool() | |
val l2hit = Bool() | |
val pte_miss = Bool() | |
val pte_hit = Bool() | |
} | |
class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p) | |
with HasCoreParameters { | |
val ptbr = new PTBR().asInput | |
val sfence = Valid(new SFenceReq).flip | |
val status = new MStatus().asInput | |
val pmp = Vec(nPMPs, new PMP).asInput | |
val perf = new PTWPerfEvents().asOutput | |
val customCSRs = coreParams.customCSRs.asInput | |
val clock_enabled = Bool(OUTPUT) | |
} | |
class PTE(implicit p: Parameters) extends CoreBundle()(p) { | |
val ppn = UInt(width = 54) | |
val reserved_for_software = Bits(width = 2) | |
val d = Bool() | |
val a = Bool() | |
val g = Bool() | |
val u = Bool() | |
val x = Bool() | |
val w = Bool() | |
val r = Bool() | |
val v = Bool() | |
def table(dummy: Int = 0) = v && !r && !w && !x | |
def leaf(dummy: Int = 0) = v && (r || (x && !w)) && a | |
def ur(dummy: Int = 0) = sr() && u | |
def uw(dummy: Int = 0) = sw() && u | |
def ux(dummy: Int = 0) = sx() && u | |
def sr(dummy: Int = 0) = leaf() && r | |
def sw(dummy: Int = 0) = leaf() && w && d | |
def sx(dummy: Int = 0) = leaf() && x | |
} | |
class L2TLBEntry(implicit p: Parameters) extends CoreBundle()(p) | |
with HasCoreParameters { | |
val idxBits = log2Ceil(coreParams.nL2TLBEntries) | |
val tagBits = vpnBits - idxBits | |
val tag = UInt(width = tagBits) | |
val ppn = UInt(width = ppnBits) | |
val d = Bool() | |
val a = Bool() | |
val u = Bool() | |
val x = Bool() | |
val w = Bool() | |
val r = Bool() | |
override def cloneType = new L2TLBEntry().asInstanceOf[this.type] | |
} | |
@chiselName | |
class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(p) { | |
val io = new Bundle { | |
val requestor = Vec(n, new TLBPTWIO).flip | |
val mem = new HellaCacheIO | |
val dpath = new DatapathPTWIO | |
} | |
val omSRAMs = collection.mutable.ListBuffer[OMSRAM]() | |
val s_ready :: s_req :: s_wait1 :: s_dummy1 :: s_wait2 :: s_wait3 :: s_dummy2 :: s_fragment_superpage :: Nil = Enum(UInt(), 8) | |
val state = Reg(init=s_ready) | |
val l2_refill_wire = Wire(Bool()) | |
val arb = Module(new Arbiter(Valid(new PTWReq), n)) | |
arb.io.in <> io.requestor.map(_.req) | |
arb.io.out.ready := (state === s_ready) && !l2_refill_wire | |
val resp_valid = Reg(next = Vec.fill(io.requestor.size)(Bool(false))) | |
val clock_en = state =/= s_ready || l2_refill_wire || arb.io.out.valid || io.dpath.sfence.valid || io.dpath.customCSRs.disableDCacheClockGate | |
io.dpath.clock_enabled := usingVM && clock_en | |
val gated_clock = | |
if (!usingVM || !tileParams.dcache.get.clockGate) clock | |
else ClockGate(clock, clock_en, "ptw_clock_gate") | |
withClock (gated_clock) { // entering gated-clock domain | |
val invalidated = Reg(Bool()) | |
val count = Reg(UInt(width = log2Up(pgLevels))) | |
val resp_ae = RegNext(false.B) | |
val resp_fragmented_superpage = RegNext(false.B) | |
val r_req = Reg(new PTWReq) | |
val r_req_dest = Reg(Bits()) | |
val r_pte = Reg(new PTE) | |
val mem_resp_valid = RegNext(io.mem.resp.valid) | |
val mem_resp_data = RegNext(io.mem.resp.bits.data) | |
io.mem.uncached_resp.map { resp => | |
assert(!(resp.valid && io.mem.resp.valid)) | |
resp.ready := true | |
when (resp.valid) { | |
mem_resp_valid := true | |
mem_resp_data := resp.bits.data | |
} | |
} | |
val (pte, invalid_paddr) = { | |
val tmp = new PTE().fromBits(mem_resp_data) | |
val res = Wire(init = tmp) | |
res.ppn := tmp.ppn(ppnBits-1, 0) | |
when (tmp.r || tmp.w || tmp.x) { | |
// for superpage mappings, make sure PPN LSBs are zero | |
for (i <- 0 until pgLevels-1) | |
when (count <= i && tmp.ppn((pgLevels-1-i)*pgLevelBits-1, (pgLevels-2-i)*pgLevelBits) =/= 0) { res.v := false } | |
} | |
(res, (tmp.ppn >> ppnBits) =/= 0) | |
} | |
val traverse = pte.table() && !invalid_paddr && count < pgLevels-1 | |
val pte_addr = if (!usingVM) 0.U else { | |
val vpn_idxs = (0 until pgLevels).map(i => (r_req.addr >> (pgLevels-i-1)*pgLevelBits)(pgLevelBits-1,0)) | |
val vpn_idx = vpn_idxs(count) | |
Cat(r_pte.ppn, vpn_idx) << log2Ceil(xLen/8) | |
} | |
val fragmented_superpage_ppn = { | |
val choices = (pgLevels-1 until 0 by -1).map(i => Cat(r_pte.ppn >> (pgLevelBits*i), r_req.addr(((pgLevelBits*i) min vpnBits)-1, 0).padTo(pgLevelBits*i))) | |
choices(count) | |
} | |
when (arb.io.out.fire()) { | |
r_req := arb.io.out.bits.bits | |
r_req_dest := arb.io.chosen | |
} | |
val (pte_cache_hit, pte_cache_data) = { | |
val size = 1 << log2Up(pgLevels * 2) | |
val plru = new PseudoLRU(size) | |
val valid = RegInit(0.U(size.W)) | |
val tags = Reg(Vec(size, UInt(width = paddrBits))) | |
val data = Reg(Vec(size, UInt(width = ppnBits))) | |
val hits = tags.map(_ === pte_addr).asUInt & valid | |
val hit = hits.orR | |
when (mem_resp_valid && traverse && !hit && !invalidated) { | |
val r = Mux(valid.andR, plru.way, PriorityEncoder(~valid)) | |
valid := valid | UIntToOH(r) | |
tags(r) := pte_addr | |
data(r) := pte.ppn | |
} | |
when (hit && state === s_req) { plru.access(OHToUInt(hits)) } | |
when (io.dpath.sfence.valid && !io.dpath.sfence.bits.rs1) { valid := 0.U } | |
for (i <- 0 until pgLevels-1) | |
ccover(hit && state === s_req && count === i, s"PTE_CACHE_HIT_L$i", s"PTE cache hit, level $i") | |
(hit && count < pgLevels-1, Mux1H(hits, data)) | |
} | |
val pte_hit = RegNext(false.B) | |
io.dpath.perf.pte_miss := false | |
io.dpath.perf.pte_hit := pte_hit && (state === s_req) && !io.dpath.perf.l2hit | |
assert(!(io.dpath.perf.l2hit && (io.dpath.perf.pte_miss || io.dpath.perf.pte_hit)), | |
"PTE Cache Hit/Miss Performance Monitor Events are lower priority than L2TLB Hit event") | |
val l2_refill = RegNext(false.B) | |
l2_refill_wire := l2_refill | |
io.dpath.perf.l2miss := false | |
io.dpath.perf.l2hit := false | |
val (l2_hit, l2_error, l2_pte, l2_tlb_ram) = if (coreParams.nL2TLBEntries == 0) (false.B, false.B, Wire(new PTE), None) else { | |
val code = new ParityCode | |
require(isPow2(coreParams.nL2TLBEntries)) | |
val idxBits = log2Ceil(coreParams.nL2TLBEntries) | |
val (ram, omSRAM) = DescribedSRAM( | |
name = "l2_tlb_ram", | |
desc = "L2 TLB", | |
size = coreParams.nL2TLBEntries, | |
data = UInt(width = code.width(new L2TLBEntry().getWidth)) | |
) | |
val g = Reg(UInt(width = coreParams.nL2TLBEntries)) | |
val valid = RegInit(UInt(0, coreParams.nL2TLBEntries)) | |
val (r_tag, r_idx) = Split(r_req.addr, idxBits) | |
when (l2_refill && !invalidated) { | |
val entry = Wire(new L2TLBEntry) | |
entry := r_pte | |
entry.tag := r_tag | |
ram.write(r_idx, code.encode(entry.asUInt)) | |
val mask = UIntToOH(r_idx) | |
valid := valid | mask | |
g := Mux(r_pte.g, g | mask, g & ~mask) | |
} | |
when (io.dpath.sfence.valid) { | |
valid := | |
Mux(io.dpath.sfence.bits.rs1, valid & ~UIntToOH(io.dpath.sfence.bits.addr(idxBits+pgIdxBits-1, pgIdxBits)), | |
Mux(io.dpath.sfence.bits.rs2, valid & g, 0.U)) | |
} | |
val s0_valid = !l2_refill && arb.io.out.fire() | |
val s1_valid = RegNext(s0_valid && arb.io.out.bits.valid) | |
val s2_valid = RegNext(s1_valid) | |
val s1_rdata = ram.read(arb.io.out.bits.bits.addr(idxBits-1, 0), s0_valid) | |
val s2_rdata = code.decode(RegEnable(s1_rdata, s1_valid)) | |
val s2_valid_bit = RegEnable(valid(r_idx), s1_valid) | |
val s2_g = RegEnable(g(r_idx), s1_valid) | |
when (s2_valid && s2_valid_bit && s2_rdata.error) { valid := 0.U } | |
val s2_entry = s2_rdata.uncorrected.asTypeOf(new L2TLBEntry) | |
val s2_hit = s2_valid && s2_valid_bit && r_tag === s2_entry.tag | |
io.dpath.perf.l2miss := s2_valid && !(s2_valid_bit && r_tag === s2_entry.tag) | |
io.dpath.perf.l2hit := s2_hit | |
val s2_pte = Wire(new PTE) | |
s2_pte := s2_entry | |
s2_pte.g := s2_g | |
s2_pte.v := true | |
ccover(s2_hit, "L2_TLB_HIT", "L2 TLB hit") | |
omSRAMs += omSRAM | |
(s2_hit, s2_rdata.error, s2_pte, Some(ram)) | |
} | |
// if SFENCE occurs during walk, don't refill PTE cache or L2 TLB until next walk | |
invalidated := io.dpath.sfence.valid || (invalidated && state =/= s_ready) | |
io.mem.req.valid := state === s_req || state === s_dummy1 | |
io.mem.req.bits.phys := Bool(true) | |
io.mem.req.bits.cmd := M_XRD | |
io.mem.req.bits.size := log2Ceil(xLen/8) | |
io.mem.req.bits.signed := false | |
io.mem.req.bits.addr := pte_addr | |
io.mem.req.bits.idx.foreach(_ := pte_addr) | |
io.mem.req.bits.dprv := PRV.S.U // PTW accesses are S-mode by definition | |
io.mem.s1_kill := l2_hit || state =/= s_wait1 | |
io.mem.s2_kill := Bool(false) | |
val pageGranularityPMPs = pmpGranularity >= (1 << pgIdxBits) | |
val pmaPgLevelHomogeneous = (0 until pgLevels) map { i => | |
val pgSize = BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits)) | |
if (pageGranularityPMPs && i == pgLevels - 1) { | |
require(TLBPageLookup.homogeneous(edge.manager.managers, pgSize), s"All memory regions must be $pgSize-byte aligned") | |
true.B | |
} else { | |
TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), pgSize)(pte_addr).homogeneous | |
} | |
} | |
val pmaHomogeneous = pmaPgLevelHomogeneous(count) | |
val pmpHomogeneous = new PMPHomogeneityChecker(io.dpath.pmp).apply(pte_addr >> pgIdxBits << pgIdxBits, count) | |
val homogeneous = pmaHomogeneous && pmpHomogeneous | |
for (i <- 0 until io.requestor.size) { | |
io.requestor(i).resp.valid := resp_valid(i) | |
io.requestor(i).resp.bits.ae := resp_ae | |
io.requestor(i).resp.bits.pte := r_pte | |
io.requestor(i).resp.bits.level := count | |
io.requestor(i).resp.bits.homogeneous := homogeneous || pageGranularityPMPs | |
io.requestor(i).resp.bits.fragmented_superpage := resp_fragmented_superpage && pageGranularityPMPs | |
io.requestor(i).ptbr := io.dpath.ptbr | |
io.requestor(i).customCSRs := io.dpath.customCSRs | |
io.requestor(i).status := io.dpath.status | |
io.requestor(i).pmp := io.dpath.pmp | |
} | |
// control state machine | |
val next_state = Wire(init = state) | |
state := OptimizationBarrier(next_state) | |
switch (state) { | |
is (s_ready) { | |
when (arb.io.out.fire()) { | |
next_state := Mux(arb.io.out.bits.valid, s_req, s_ready) | |
} | |
count := pgLevels - minPgLevels - io.dpath.ptbr.additionalPgLevels | |
} | |
is (s_req) { | |
when (pte_cache_hit) { | |
count := count + 1 | |
pte_hit := true | |
}.otherwise { | |
next_state := Mux(io.mem.req.ready, s_wait1, s_req) | |
} | |
} | |
is (s_wait1) { | |
// This Mux is for the l2_error case; the l2_hit && !l2_error case is overriden below | |
next_state := Mux(l2_hit, s_req, s_wait2) | |
} | |
is (s_wait2) { | |
next_state := s_wait3 | |
io.dpath.perf.pte_miss := count < pgLevels-1 | |
when (io.mem.s2_xcpt.ae.ld) { | |
resp_ae := true | |
next_state := s_ready | |
resp_valid(r_req_dest) := true | |
} | |
} | |
is (s_fragment_superpage) { | |
next_state := s_ready | |
resp_valid(r_req_dest) := true | |
resp_ae := false | |
when (!homogeneous) { | |
count := pgLevels-1 | |
resp_fragmented_superpage := true | |
} | |
} | |
} | |
def makePTE(ppn: UInt, default: PTE) = { | |
val pte = Wire(init = default) | |
pte.ppn := ppn | |
pte | |
} | |
r_pte := OptimizationBarrier( | |
Mux(mem_resp_valid, pte, | |
Mux(l2_hit && !l2_error, l2_pte, | |
Mux(state === s_fragment_superpage && !homogeneous, makePTE(fragmented_superpage_ppn, r_pte), | |
Mux(state === s_req && pte_cache_hit, makePTE(pte_cache_data, l2_pte), | |
Mux(arb.io.out.fire(), makePTE(io.dpath.ptbr.ppn, r_pte), | |
r_pte)))))) | |
when (l2_hit && !l2_error) { | |
assert(state === s_req || state === s_wait1) | |
next_state := s_ready | |
resp_valid(r_req_dest) := true | |
resp_ae := false | |
count := pgLevels-1 | |
} | |
when (mem_resp_valid) { | |
assert(state === s_wait3) | |
when (traverse) { | |
next_state := s_req | |
count := count + 1 | |
}.otherwise { | |
l2_refill := pte.v && !invalid_paddr && count === pgLevels-1 | |
val ae = pte.v && invalid_paddr | |
resp_ae := ae | |
when (pageGranularityPMPs && count =/= pgLevels-1 && !ae) { | |
next_state := s_fragment_superpage | |
}.otherwise { | |
next_state := s_ready | |
resp_valid(r_req_dest) := true | |
} | |
} | |
} | |
when (io.mem.s2_nack) { | |
assert(state === s_wait2) | |
next_state := s_req | |
} | |
for (i <- 0 until pgLevels) { | |
val leaf = mem_resp_valid && !traverse && count === i | |
ccover(leaf && pte.v && !invalid_paddr, s"L$i", s"successful page-table access, level $i") | |
ccover(leaf && pte.v && invalid_paddr, s"L${i}_BAD_PPN_MSB", s"PPN too large, level $i") | |
ccover(leaf && !mem_resp_data(0), s"L${i}_INVALID_PTE", s"page not present, level $i") | |
if (i != pgLevels-1) | |
ccover(leaf && !pte.v && mem_resp_data(0), s"L${i}_BAD_PPN_LSB", s"PPN LSBs not zero, level $i") | |
} | |
ccover(mem_resp_valid && count === pgLevels-1 && pte.table(), s"TOO_DEEP", s"page table too deep") | |
ccover(io.mem.s2_nack, "NACK", "D$ nacked page-table access") | |
ccover(state === s_wait2 && io.mem.s2_xcpt.ae.ld, "AE", "access exception while walking page table") | |
} // leaving gated-clock domain | |
private def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = | |
if (usingVM) cover(cond, s"PTW_$label", "MemorySystem;;" + desc) | |
} | |
/** Mix-ins for constructing tiles that might have a PTW */ | |
trait CanHavePTW extends HasTileParameters with HasHellaCache { this: BaseTile => | |
val module: CanHavePTWModule | |
val utlbOMSRAMs = collection.mutable.ListBuffer[OMSRAM]() | |
var nPTWPorts = 1 | |
nDCachePorts += usingPTW.toInt | |
} | |
trait CanHavePTWModule extends HasHellaCacheModule { | |
val outer: CanHavePTW | |
val ptwPorts = ListBuffer(outer.dcache.module.io.ptw) | |
val ptw = Module(new PTW(outer.nPTWPorts)(outer.dcache.node.edges.out(0), outer.p)) | |
if (outer.usingPTW) { | |
dcachePorts += ptw.io.mem | |
outer.utlbOMSRAMs ++= ptw.omSRAMs | |
} | |
} |