From ac109dfaa8bf0d123b21035ffe79b581c438d5c9 Mon Sep 17 00:00:00 2001 From: Yuan Yao Date: Sun, 3 Sep 2017 16:43:19 -0400 Subject: [PATCH] ruby: Integrate Ruby into gem5-aladdin. To use ruby, compile using the 'MESI_Two_Level_aladdin' protocol (build/X86/variables). This protocol is identical to the existing MESI_Two_Level protocol. Then add the --ruby flag on the command line. Change-Id: I7498bd71ff387a5ff4ad0e1d50acb8a95873303c --- build_opts/X86_MESI_Two_Level_aladdin | 3 + configs/aladdin/aladdin_se.py | 35 +++-- configs/ruby/MESI_Two_Level_aladdin.py | 148 ++++++++++++++++++ src/aladdin | 2 +- src/cpu/o3/O3CPU.py | 2 +- src/dev/dma_device.cc | 2 +- src/mem/packet_queue.cc | 8 +- src/mem/protocol/MESI_Two_Level_aladdin.slicc | 7 + src/mem/protocol/SConsopts | 1 + src/mem/ruby/system/DMASequencer.cc | 2 +- 10 files changed, 191 insertions(+), 19 deletions(-) create mode 100644 build_opts/X86_MESI_Two_Level_aladdin create mode 100644 configs/ruby/MESI_Two_Level_aladdin.py create mode 100644 src/mem/protocol/MESI_Two_Level_aladdin.slicc diff --git a/build_opts/X86_MESI_Two_Level_aladdin b/build_opts/X86_MESI_Two_Level_aladdin new file mode 100644 index 0000000000..8798f8a339 --- /dev/null +++ b/build_opts/X86_MESI_Two_Level_aladdin @@ -0,0 +1,3 @@ +TARGET_ISA = 'x86' +CPU_MODELS = 'TimingSimpleCPU,O3CPU,AtomicSimpleCPU' +PROTOCOL = 'MESI_Two_Level_aladdin' diff --git a/configs/aladdin/aladdin_se.py b/configs/aladdin/aladdin_se.py index e5d25252dc..4075017a3f 100644 --- a/configs/aladdin/aladdin_se.py +++ b/configs/aladdin/aladdin_se.py @@ -254,6 +254,11 @@ def get_processes(options): datapath.numOutStandingWalks = config.getint( accel, "tlb_max_outstanding_walks") datapath.tlbBandwidth = config.getint(accel, "tlb_bandwidth") + elif memory_type == "spad" and options.ruby: + # If the memory_type is spad, Aladdin will initiate a 1-way cache for every + # datapath, though this cache will not be used in simulation. + # Since Ruby doesn't support 1-way cache, so set the assoc to 2. + datapath.cacheAssoc = 2 if (memory_type != "cache" and memory_type != "spad"): fatal("Aladdin configuration file specified invalid memory type %s for " "accelerator %s." % (memory_type, accel)) @@ -296,20 +301,18 @@ def get_processes(options): system.cpu[i].createThreads() if options.ruby: - if not (options.cpu_type == "detailed" or options.cpu_type == "timing"): - print >> sys.stderr, "Ruby requires TimingSimpleCPU or O3CPU!!" + if not (options.cpu_type == "TimingSimpleCPU" or options.cpu_type == "DerivO3CPU"): + print >> sys.stderr, "Ruby requires TimingSimpleCPU or DerivO3CPU!!" sys.exit(1) - # Set the option for physmem so that it is not allocated any space - system.physmem = MemClass(range=AddrRange(options.mem_size), - null = True) - - options.use_map = True - Ruby.create_system(options, system) - assert(options.num_cpus == len(system.ruby._cpu_ruby_ports)) + Ruby.create_system(options, False, system) + assert(options.num_cpus + 2*len(system.find_all(HybridDatapath)[0]) == + len(system.ruby._cpu_ports)) + system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock, + voltage_domain = system.voltage_domain) for i in xrange(np): - ruby_port = system.ruby._cpu_ruby_ports[i] + ruby_port = system.ruby._cpu_ports[i] # Create the interrupt controller and connect its ports to Ruby # Note that the interrupt controller is always present but only @@ -320,11 +323,17 @@ def get_processes(options): system.cpu[i].icache_port = ruby_port.slave system.cpu[i].dcache_port = ruby_port.slave if buildEnv['TARGET_ISA'] == 'x86': - system.cpu[i].interrupts.pio = ruby_port.master - system.cpu[i].interrupts.int_master = ruby_port.slave - system.cpu[i].interrupts.int_slave = ruby_port.master + system.cpu[i].interrupts[0].pio = ruby_port.master + system.cpu[i].interrupts[0].int_master = ruby_port.slave + system.cpu[i].interrupts[0].int_slave = ruby_port.master system.cpu[i].itb.walker.port = ruby_port.slave system.cpu[i].dtb.walker.port = ruby_port.slave + + if options.accel_cfg_file: + for i,datapath in enumerate(datapaths): + datapath.cache_port = system.ruby._cpu_ports[options.num_cpus+2*i].slave + datapath.spad_port = system.ruby._cpu_ports[options.num_cpus+2*i+1].slave + else: system.membus = SystemXBar(width=options.xbar_width) diff --git a/configs/ruby/MESI_Two_Level_aladdin.py b/configs/ruby/MESI_Two_Level_aladdin.py new file mode 100644 index 0000000000..2341f58403 --- /dev/null +++ b/configs/ruby/MESI_Two_Level_aladdin.py @@ -0,0 +1,148 @@ +# Copyright (c) 2006-2007 The Regents of The University of Michigan +# Copyright (c) 2009 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Brad Beckmann + +import math +import m5 +from m5.objects import * +from m5.defines import buildEnv +from Ruby import create_topology +from Ruby import send_evicts + +# +# Declare caches used by the protocol +# +class L1Cache(RubyCache): pass +class L2Cache(RubyCache): pass + +def define_options(parser): + return + +def create_system(options, full_system, system, dma_ports, ruby_system): + + if buildEnv['PROTOCOL'] != 'MESI_Two_Level_aladdin': + fatal("This script requires the MESI_Two_Level_aladdin protocol to be built.") + + # Run the original protocol script + buildEnv['PROTOCOL'] = buildEnv['PROTOCOL'][:-8] + protocol = buildEnv['PROTOCOL'] + exec "import %s" % protocol + try: + (cpu_sequencers, dir_cntrls, topology) = \ + eval("%s.create_system(options, full_system, system, dma_ports, ruby_system)" % protocol) + except: + print "Error: could not create system for ruby protocol inside fusion system %s" % protocol + raise + + # + # Must create the individual controllers before the network to ensure the + # controller constructors are called before the network constructor + # + l2_bits = int(math.log(options.num_l2caches, 2)) + block_size_bits = int(math.log(options.cacheline_size, 2)) + + # + # Build accelerator + # + # Accelerator cache + datapaths = system.find_all(HybridDatapath)[0] + for i,datapath in enumerate(datapaths): + l1i_cache = L1Cache(size = '256B', + assoc = 2, + start_index_bit = block_size_bits, + is_icache = True) + # accelerator's private cache + l1d_cache = L1Cache(size = datapath.cacheSize, + assoc = datapath.cacheAssoc, + start_index_bit = block_size_bits, + is_icache = False) + + prefetcher = RubyPrefetcher.Prefetcher() + + l1_cntrl = L1Cache_Controller(version = options.num_cpus+i, + L1Icache = l1i_cache, + L1Dcache = l1d_cache, + l2_select_num_bits = l2_bits, + send_evictions = send_evicts(options), + prefetcher = prefetcher, + ruby_system = ruby_system, + #clk_domain = clk_domain, + #transitions_per_cycle = options.ports, + enable_prefetch = False) + + acc_seq = RubySequencer(version = options.num_cpus+i, + icache = l1i_cache, + dcache = l1d_cache, + #clk_domain = clk_domain, + ruby_system = ruby_system) + + l1_cntrl.sequencer = acc_seq + setattr(ruby_system, "l1_cntrl_acc%d" % i, l1_cntrl) + + # Add controllers and sequencers to the appropriate lists + cpu_sequencers.append(acc_seq) + topology.addController(l1_cntrl) + + # Connect the L1 controllers and the network + l1_cntrl.mandatoryQueue = MessageBuffer() + l1_cntrl.requestFromL1Cache = MessageBuffer() + l1_cntrl.requestFromL1Cache.master = ruby_system.network.slave + l1_cntrl.responseFromL1Cache = MessageBuffer() + l1_cntrl.responseFromL1Cache.master = ruby_system.network.slave + l1_cntrl.unblockFromL1Cache = MessageBuffer() + l1_cntrl.unblockFromL1Cache.master = ruby_system.network.slave + + l1_cntrl.optionalQueue = MessageBuffer() + + l1_cntrl.requestToL1Cache = MessageBuffer() + l1_cntrl.requestToL1Cache.slave = ruby_system.network.master + l1_cntrl.responseToL1Cache = MessageBuffer() + l1_cntrl.responseToL1Cache.slave = ruby_system.network.master + + # Scratchpad port + # The scratchpad port is conneted to the DMA controller + spad_seq = DMASequencer(version = i, + ruby_system = ruby_system) + + spad_cntrl = DMA_Controller(version = i, dma_sequencer = spad_seq, + transitions_per_cycle = options.ports, + ruby_system = ruby_system) + + setattr(ruby_system, "spad_cntrl_acc%d" % i, spad_cntrl) + + # Connect the dma controllers and the network + spad_cntrl.mandatoryQueue = MessageBuffer() + spad_cntrl.requestToDir = MessageBuffer() + spad_cntrl.requestToDir.master = ruby_system.network.slave + spad_cntrl.responseFromDir = MessageBuffer(ordered = True) + spad_cntrl.responseFromDir.slave = ruby_system.network.master + + cpu_sequencers.append(spad_seq) + topology.addController(spad_cntrl) + + return (cpu_sequencers, dir_cntrls, topology) diff --git a/src/aladdin b/src/aladdin index 96aabfe6eb..378b4de1d9 160000 --- a/src/aladdin +++ b/src/aladdin @@ -1 +1 @@ -Subproject commit 96aabfe6eb3e3193734b050b93f6f2814c0fcb49 +Subproject commit 378b4de1d93948e368d6742a02c4093eb449b310 diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py index 8507ab6ff3..4b67862af8 100644 --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -73,7 +73,7 @@ def support_take_over(cls): "delay") commitToFetchDelay = Param.Cycles(1, "Commit to fetch delay") fetchWidth = Param.Unsigned(8, "Fetch width") - fetchBufferSize = Param.Unsigned(64, "Fetch buffer size in bytes") + fetchBufferSize = Param.Unsigned(32, "Fetch buffer size in bytes") fetchQueueSize = Param.Unsigned(32, "Fetch queue size in micro-ops " "per-thread") diff --git a/src/dev/dma_device.cc b/src/dev/dma_device.cc index 2eb22b1727..e8bcd98366 100644 --- a/src/dev/dma_device.cc +++ b/src/dev/dma_device.cc @@ -213,7 +213,7 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event, * last channel that is just added. If we switch to the fixed-number of * channels model, we can let users to pick which channel they want to use, * or automatically pick the empty channel. */ - for (ChunkGenerator gen(addr, size, chunkSize); + for (ChunkGenerator gen(addr, size, sys->cacheLineSize()); !gen.done(); gen.next()) { req = new Request(gen.addr(), gen.size(), flag, masterId); req->taskId(ContextSwitchTaskId::DMA); diff --git a/src/mem/packet_queue.cc b/src/mem/packet_queue.cc index 7649fe5a61..d53dc86915 100644 --- a/src/mem/packet_queue.cc +++ b/src/mem/packet_queue.cc @@ -118,8 +118,12 @@ PacketQueue::schedSendTiming(PacketPtr pkt, Tick when, bool force_order) // add a very basic sanity check on the port to ensure the // invisible buffer is not growing beyond reasonable limits - if (!_disableSanityCheck && transmitList.size() > 100) { - panic("Packet queue %s has grown beyond 100 packets\n", + // + // NOTE: When using Ruby with DMA, the amount of traffic generated can be + // far greater than what can be produced by a CPU, so increase this + // threshold to avoid tripping the sanity check. + if (!_disableSanityCheck && transmitList.size() > 1000) { + panic("Packet queue %s has grown beyond 1000 packets\n", name()); } diff --git a/src/mem/protocol/MESI_Two_Level_aladdin.slicc b/src/mem/protocol/MESI_Two_Level_aladdin.slicc new file mode 100644 index 0000000000..ccf20c2c34 --- /dev/null +++ b/src/mem/protocol/MESI_Two_Level_aladdin.slicc @@ -0,0 +1,7 @@ +protocol "MESI_Two_Level_aladdin"; +include "RubySlicc_interfaces.slicc"; +include "MESI_Two_Level-msg.sm"; +include "MESI_Two_Level-L1cache.sm"; +include "MESI_Two_Level-L2cache.sm"; +include "MESI_Two_Level-dir.sm"; +include "MESI_Two_Level-dma.sm"; diff --git a/src/mem/protocol/SConsopts b/src/mem/protocol/SConsopts index 54cd4dbc07..2a46f9b7bb 100644 --- a/src/mem/protocol/SConsopts +++ b/src/mem/protocol/SConsopts @@ -39,6 +39,7 @@ all_protocols.extend([ 'GPU_RfO', 'MOESI_AMD_Base', 'MESI_Two_Level', + 'MESI_Two_Level_aladdin', 'MESI_Three_Level', 'MI_example', 'MOESI_CMP_directory', diff --git a/src/mem/ruby/system/DMASequencer.cc b/src/mem/ruby/system/DMASequencer.cc index 0ad8a205de..c98c8b4a78 100644 --- a/src/mem/ruby/system/DMASequencer.cc +++ b/src/mem/ruby/system/DMASequencer.cc @@ -69,7 +69,7 @@ DMASequencer::makeRequest(PacketPtr pkt) } Addr paddr = pkt->getAddr(); - uint8_t* data = pkt->getPtr(); + uint8_t* data = pkt->hasData() ? pkt->getPtr() : NULL; int len = pkt->getSize(); bool write = pkt->isWrite();