From ac109dfaa8bf0d123b21035ffe79b581c438d5c9 Mon Sep 17 00:00:00 2001
From: Yuan Yao <yaoyuannnn@gmail.com>
Date: Sun, 3 Sep 2017 16:43:19 -0400
Subject: [PATCH] ruby: Integrate Ruby into gem5-aladdin.

To use ruby, compile using the 'MESI_Two_Level_aladdin' protocol
(build/X86/variables). This protocol is identical to the existing
MESI_Two_Level protocol. Then add the --ruby flag on the command line.

Change-Id: I7498bd71ff387a5ff4ad0e1d50acb8a95873303c
---
 build_opts/X86_MESI_Two_Level_aladdin         |   3 +
 configs/aladdin/aladdin_se.py                 |  35 +++--
 configs/ruby/MESI_Two_Level_aladdin.py        | 148 ++++++++++++++++++
 src/aladdin                                   |   2 +-
 src/cpu/o3/O3CPU.py                           |   2 +-
 src/dev/dma_device.cc                         |   2 +-
 src/mem/packet_queue.cc                       |   8 +-
 src/mem/protocol/MESI_Two_Level_aladdin.slicc |   7 +
 src/mem/protocol/SConsopts                    |   1 +
 src/mem/ruby/system/DMASequencer.cc           |   2 +-
 10 files changed, 191 insertions(+), 19 deletions(-)
 create mode 100644 build_opts/X86_MESI_Two_Level_aladdin
 create mode 100644 configs/ruby/MESI_Two_Level_aladdin.py
 create mode 100644 src/mem/protocol/MESI_Two_Level_aladdin.slicc

diff --git a/build_opts/X86_MESI_Two_Level_aladdin b/build_opts/X86_MESI_Two_Level_aladdin
new file mode 100644
index 0000000000..8798f8a339
--- /dev/null
+++ b/build_opts/X86_MESI_Two_Level_aladdin
@@ -0,0 +1,3 @@
+TARGET_ISA = 'x86'
+CPU_MODELS = 'TimingSimpleCPU,O3CPU,AtomicSimpleCPU'
+PROTOCOL = 'MESI_Two_Level_aladdin'
diff --git a/configs/aladdin/aladdin_se.py b/configs/aladdin/aladdin_se.py
index e5d25252dc..4075017a3f 100644
--- a/configs/aladdin/aladdin_se.py
+++ b/configs/aladdin/aladdin_se.py
@@ -254,6 +254,11 @@ def get_processes(options):
       datapath.numOutStandingWalks = config.getint(
           accel, "tlb_max_outstanding_walks")
       datapath.tlbBandwidth = config.getint(accel, "tlb_bandwidth")
+    elif memory_type == "spad" and options.ruby:
+      # If the memory_type is spad, Aladdin will initiate a 1-way cache for every
+      # datapath, though this cache will not be used in simulation.
+      # Since Ruby doesn't support 1-way cache, so set the assoc to 2.
+      datapath.cacheAssoc = 2
     if (memory_type != "cache" and memory_type != "spad"):
       fatal("Aladdin configuration file specified invalid memory type %s for "
             "accelerator %s." % (memory_type, accel))
@@ -296,20 +301,18 @@ def get_processes(options):
     system.cpu[i].createThreads()
 
 if options.ruby:
-    if not (options.cpu_type == "detailed" or options.cpu_type == "timing"):
-        print >> sys.stderr, "Ruby requires TimingSimpleCPU or O3CPU!!"
+    if not (options.cpu_type == "TimingSimpleCPU" or options.cpu_type == "DerivO3CPU"):
+        print >> sys.stderr, "Ruby requires TimingSimpleCPU or DerivO3CPU!!"
         sys.exit(1)
 
-    # Set the option for physmem so that it is not allocated any space
-    system.physmem = MemClass(range=AddrRange(options.mem_size),
-                              null = True)
-
-    options.use_map = True
-    Ruby.create_system(options, system)
-    assert(options.num_cpus == len(system.ruby._cpu_ruby_ports))
+    Ruby.create_system(options, False, system)
+    assert(options.num_cpus + 2*len(system.find_all(HybridDatapath)[0]) ==
+           len(system.ruby._cpu_ports))
 
+    system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
+                                        voltage_domain = system.voltage_domain)
     for i in xrange(np):
-        ruby_port = system.ruby._cpu_ruby_ports[i]
+        ruby_port = system.ruby._cpu_ports[i]
 
         # Create the interrupt controller and connect its ports to Ruby
         # Note that the interrupt controller is always present but only
@@ -320,11 +323,17 @@ def get_processes(options):
         system.cpu[i].icache_port = ruby_port.slave
         system.cpu[i].dcache_port = ruby_port.slave
         if buildEnv['TARGET_ISA'] == 'x86':
-            system.cpu[i].interrupts.pio = ruby_port.master
-            system.cpu[i].interrupts.int_master = ruby_port.slave
-            system.cpu[i].interrupts.int_slave = ruby_port.master
+            system.cpu[i].interrupts[0].pio = ruby_port.master
+            system.cpu[i].interrupts[0].int_master = ruby_port.slave
+            system.cpu[i].interrupts[0].int_slave = ruby_port.master
             system.cpu[i].itb.walker.port = ruby_port.slave
             system.cpu[i].dtb.walker.port = ruby_port.slave
+
+    if options.accel_cfg_file:
+        for i,datapath in enumerate(datapaths):
+            datapath.cache_port = system.ruby._cpu_ports[options.num_cpus+2*i].slave
+            datapath.spad_port = system.ruby._cpu_ports[options.num_cpus+2*i+1].slave
+
 else:
     system.membus = SystemXBar(width=options.xbar_width)
 
diff --git a/configs/ruby/MESI_Two_Level_aladdin.py b/configs/ruby/MESI_Two_Level_aladdin.py
new file mode 100644
index 0000000000..2341f58403
--- /dev/null
+++ b/configs/ruby/MESI_Two_Level_aladdin.py
@@ -0,0 +1,148 @@
+# Copyright (c) 2006-2007 The Regents of The University of Michigan
+# Copyright (c) 2009 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Brad Beckmann
+
+import math
+import m5
+from m5.objects import *
+from m5.defines import buildEnv
+from Ruby import create_topology
+from Ruby import send_evicts
+
+#
+# Declare caches used by the protocol
+#
+class L1Cache(RubyCache): pass
+class L2Cache(RubyCache): pass
+
+def define_options(parser):
+    return
+
+def create_system(options, full_system, system, dma_ports, ruby_system):
+
+    if buildEnv['PROTOCOL'] != 'MESI_Two_Level_aladdin':
+        fatal("This script requires the MESI_Two_Level_aladdin protocol to be built.")
+
+    # Run the original protocol script
+    buildEnv['PROTOCOL'] = buildEnv['PROTOCOL'][:-8]
+    protocol = buildEnv['PROTOCOL']
+    exec "import %s" % protocol
+    try:
+        (cpu_sequencers, dir_cntrls, topology) = \
+            eval("%s.create_system(options, full_system, system, dma_ports, ruby_system)" % protocol)
+    except:
+        print "Error: could not create system for ruby protocol inside fusion system %s" % protocol
+        raise
+
+    #
+    # Must create the individual controllers before the network to ensure the
+    # controller constructors are called before the network constructor
+    #
+    l2_bits = int(math.log(options.num_l2caches, 2))
+    block_size_bits = int(math.log(options.cacheline_size, 2))
+
+    #
+    # Build accelerator
+    #
+    # Accelerator cache
+    datapaths = system.find_all(HybridDatapath)[0]
+    for i,datapath in enumerate(datapaths):
+        l1i_cache = L1Cache(size = '256B',
+                            assoc = 2,
+                            start_index_bit = block_size_bits,
+                            is_icache = True)
+        # accelerator's private cache
+        l1d_cache = L1Cache(size = datapath.cacheSize,
+                            assoc = datapath.cacheAssoc,
+                            start_index_bit = block_size_bits,
+                            is_icache = False)
+
+        prefetcher = RubyPrefetcher.Prefetcher()
+
+        l1_cntrl = L1Cache_Controller(version = options.num_cpus+i,
+                                      L1Icache = l1i_cache,
+                                      L1Dcache = l1d_cache,
+                                      l2_select_num_bits = l2_bits,
+                                      send_evictions = send_evicts(options),
+                                      prefetcher = prefetcher,
+                                      ruby_system = ruby_system,
+                                      #clk_domain = clk_domain,
+                                      #transitions_per_cycle = options.ports,
+                                      enable_prefetch = False)
+
+        acc_seq = RubySequencer(version = options.num_cpus+i,
+                                icache = l1i_cache,
+                                dcache = l1d_cache,
+                                #clk_domain = clk_domain,
+                                ruby_system = ruby_system)
+
+        l1_cntrl.sequencer = acc_seq
+        setattr(ruby_system, "l1_cntrl_acc%d" % i, l1_cntrl)
+
+        # Add controllers and sequencers to the appropriate lists
+        cpu_sequencers.append(acc_seq)
+        topology.addController(l1_cntrl)
+
+        # Connect the L1 controllers and the network
+        l1_cntrl.mandatoryQueue = MessageBuffer()
+        l1_cntrl.requestFromL1Cache = MessageBuffer()
+        l1_cntrl.requestFromL1Cache.master = ruby_system.network.slave
+        l1_cntrl.responseFromL1Cache = MessageBuffer()
+        l1_cntrl.responseFromL1Cache.master = ruby_system.network.slave
+        l1_cntrl.unblockFromL1Cache = MessageBuffer()
+        l1_cntrl.unblockFromL1Cache.master = ruby_system.network.slave
+
+        l1_cntrl.optionalQueue = MessageBuffer()
+
+        l1_cntrl.requestToL1Cache = MessageBuffer()
+        l1_cntrl.requestToL1Cache.slave = ruby_system.network.master
+        l1_cntrl.responseToL1Cache = MessageBuffer()
+        l1_cntrl.responseToL1Cache.slave = ruby_system.network.master
+
+        # Scratchpad port
+        # The scratchpad port is conneted to the DMA controller
+        spad_seq = DMASequencer(version = i,
+                                ruby_system = ruby_system)
+
+        spad_cntrl = DMA_Controller(version = i, dma_sequencer = spad_seq,
+                                    transitions_per_cycle = options.ports,
+                                    ruby_system = ruby_system)
+
+        setattr(ruby_system, "spad_cntrl_acc%d" % i, spad_cntrl)
+
+        # Connect the dma controllers and the network
+        spad_cntrl.mandatoryQueue = MessageBuffer()
+        spad_cntrl.requestToDir = MessageBuffer()
+        spad_cntrl.requestToDir.master = ruby_system.network.slave
+        spad_cntrl.responseFromDir = MessageBuffer(ordered = True)
+        spad_cntrl.responseFromDir.slave = ruby_system.network.master
+
+        cpu_sequencers.append(spad_seq)
+        topology.addController(spad_cntrl)
+
+    return (cpu_sequencers, dir_cntrls, topology)
diff --git a/src/aladdin b/src/aladdin
index 96aabfe6eb..378b4de1d9 160000
--- a/src/aladdin
+++ b/src/aladdin
@@ -1 +1 @@
-Subproject commit 96aabfe6eb3e3193734b050b93f6f2814c0fcb49
+Subproject commit 378b4de1d93948e368d6742a02c4093eb449b310
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py
index 8507ab6ff3..4b67862af8 100644
--- a/src/cpu/o3/O3CPU.py
+++ b/src/cpu/o3/O3CPU.py
@@ -73,7 +73,7 @@ def support_take_over(cls):
                                    "delay")
     commitToFetchDelay = Param.Cycles(1, "Commit to fetch delay")
     fetchWidth = Param.Unsigned(8, "Fetch width")
-    fetchBufferSize = Param.Unsigned(64, "Fetch buffer size in bytes")
+    fetchBufferSize = Param.Unsigned(32, "Fetch buffer size in bytes")
     fetchQueueSize = Param.Unsigned(32, "Fetch queue size in micro-ops "
                                     "per-thread")
 
diff --git a/src/dev/dma_device.cc b/src/dev/dma_device.cc
index 2eb22b1727..e8bcd98366 100644
--- a/src/dev/dma_device.cc
+++ b/src/dev/dma_device.cc
@@ -213,7 +213,7 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
      * last channel that is just added. If we switch to the fixed-number of
      * channels model, we can let users to pick which channel they want to use,
      * or automatically pick the empty channel. */
-    for (ChunkGenerator gen(addr, size, chunkSize);
+    for (ChunkGenerator gen(addr, size, sys->cacheLineSize());
          !gen.done(); gen.next()) {
         req = new Request(gen.addr(), gen.size(), flag, masterId);
         req->taskId(ContextSwitchTaskId::DMA);
diff --git a/src/mem/packet_queue.cc b/src/mem/packet_queue.cc
index 7649fe5a61..d53dc86915 100644
--- a/src/mem/packet_queue.cc
+++ b/src/mem/packet_queue.cc
@@ -118,8 +118,12 @@ PacketQueue::schedSendTiming(PacketPtr pkt, Tick when, bool force_order)
 
     // add a very basic sanity check on the port to ensure the
     // invisible buffer is not growing beyond reasonable limits
-    if (!_disableSanityCheck && transmitList.size() > 100) {
-        panic("Packet queue %s has grown beyond 100 packets\n",
+    //
+    // NOTE: When using Ruby with DMA, the amount of traffic generated can be
+    // far greater than what can be produced by a CPU, so increase this
+    // threshold to avoid tripping the sanity check.
+    if (!_disableSanityCheck && transmitList.size() > 1000) {
+        panic("Packet queue %s has grown beyond 1000 packets\n",
               name());
     }
 
diff --git a/src/mem/protocol/MESI_Two_Level_aladdin.slicc b/src/mem/protocol/MESI_Two_Level_aladdin.slicc
new file mode 100644
index 0000000000..ccf20c2c34
--- /dev/null
+++ b/src/mem/protocol/MESI_Two_Level_aladdin.slicc
@@ -0,0 +1,7 @@
+protocol "MESI_Two_Level_aladdin";
+include "RubySlicc_interfaces.slicc";
+include "MESI_Two_Level-msg.sm";
+include "MESI_Two_Level-L1cache.sm";
+include "MESI_Two_Level-L2cache.sm";
+include "MESI_Two_Level-dir.sm";
+include "MESI_Two_Level-dma.sm";
diff --git a/src/mem/protocol/SConsopts b/src/mem/protocol/SConsopts
index 54cd4dbc07..2a46f9b7bb 100644
--- a/src/mem/protocol/SConsopts
+++ b/src/mem/protocol/SConsopts
@@ -39,6 +39,7 @@ all_protocols.extend([
     'GPU_RfO',
     'MOESI_AMD_Base',
     'MESI_Two_Level',
+    'MESI_Two_Level_aladdin',
     'MESI_Three_Level',
     'MI_example',
     'MOESI_CMP_directory',
diff --git a/src/mem/ruby/system/DMASequencer.cc b/src/mem/ruby/system/DMASequencer.cc
index 0ad8a205de..c98c8b4a78 100644
--- a/src/mem/ruby/system/DMASequencer.cc
+++ b/src/mem/ruby/system/DMASequencer.cc
@@ -69,7 +69,7 @@ DMASequencer::makeRequest(PacketPtr pkt)
     }
 
     Addr paddr = pkt->getAddr();
-    uint8_t* data =  pkt->getPtr<uint8_t>();
+    uint8_t* data = pkt->hasData() ? pkt->getPtr<uint8_t>() : NULL;
     int len = pkt->getSize();
     bool write = pkt->isWrite();