Merge branch 'next'

caleb531 · Mar 20, 2018 · 0fba069 · 0fba069
2 parents 42ca9e0 + 34e5439
commit 0fba069
Show file tree

Hide file tree

Showing 20 changed files with 566 additions and 481 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -14,5 +14,4 @@ exclude_lines =
 
 # Only check coverage for source files
 include =
-    cachesimulator/simulator.py
-    cachesimulator/table.py
+    cachesimulator/*.py
diff --git a/LICENSE.txt b/LICENSE.txt
@@ -1,6 +1,6 @@
 The MIT License (MIT)
 
-Copyright (c) 2015-2016 Caleb Evans
+Copyright (c) 2015-2018 Caleb Evans
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -1,14 +1,19 @@
 # Cache Simulator
 
-*Copyright 2015-2016 Caleb Evans*  
+*Copyright 2015-2018 Caleb Evans*  
 *Released under the MIT license*
 
 [![Build Status](https://travis-ci.org/caleb531/cache-simulator.svg?branch=master)](https://travis-ci.org/caleb531/cache-simulator)
 [![Coverage Status](https://coveralls.io/repos/caleb531/cache-simulator/badge.svg?branch=master)](https://coveralls.io/r/caleb531/cache-simulator?branch=master)
 
-This program simulates a processor cache for the MIPS instruction set architecture. It can simulate all three fundamental caching schemes: direct-mapped, *n*-way set associative, and fully associative.
+This program simulates a processor cache for the MIPS instruction set
+architecture. It can simulate all three fundamental caching schemes:
+direct-mapped, *n*-way set associative, and fully associative.
 
-The program must be run from the command line and requires Python 3 to run. Executing the program will run the simulation and print an ASCII table containing the details for each supplied word address, as well as the final contents of the cache.
+The program must be run from the command line and requires Python 3.4+ to run.
+Executing the program will run the simulation and print an ASCII table
+containing the details for each supplied word address, as well as the final
+contents of the cache.
 
 To see example input and output, see `examples.txt`.
 
@@ -30,22 +35,30 @@ The size of the cache in words (recall that one word is four bytes in MIPS).
 
 #### --word-addrs
 
-One or more word addresses (separated by spaces), where each word address is a base-10 positive integer.
+One or more word addresses (separated by spaces), where each word address is a
+base-10 positive integer.
 
 ### Optional parameters
 
 #### --num-blocks-per-set
 
-The program internally represents all cache schemes using a set associative cache. A value of `1` for this parameter (the default) implies a direct-mapped cache. A value other than `1` implies either a set associative *or* fully associative cache.
+The program internally represents all cache schemes using a set associative
+cache. A value of `1` for this parameter (the default) implies a direct-mapped
+cache. A value other than `1` implies either a set associative *or* fully
+associative cache.
 
 #### --num-words-per-block
 
 The number of words to store for each block in the cache; the default value is `1`.
 
 #### --num-addr-bits
 
-The number of bits used to represent each given word address; this value is reflected in the *BinAddr* column in the reference table. If omitted, the default value is the number of bits needed to represent the largest of the given word addresses.
+The number of bits used to represent each given word address; this value is
+reflected in the *BinAddr* column in the reference table. If omitted, the
+default value is the number of bits needed to represent the largest of the given
+word addresses.
 
 #### --replacement-policy
 
-The replacement policy to use for the cache. Accepted values are `lru` (Least Recently Used; the default) and `mru` (Most Recently Used).
+The replacement policy to use for the cache. Accepted values are `lru` (Least
+Recently Used; the default) and `mru` (Most Recently Used).
diff --git a/cachesimulator/__main__.py b/cachesimulator/__main__.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+
+import argparse
+
+from cachesimulator.simulator import Simulator
+
+
+# Parse command-line arguments passed to the program
+def parse_cli_args():
+
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        '--cache-size',
+        type=int,
+        required=True,
+        help='the size of the cache in words')
+
+    parser.add_argument(
+        '--num-blocks-per-set',
+        type=int,
+        default=1,
+        help='the number of blocks per set')
+
+    parser.add_argument(
+        '--num-words-per-block',
+        type=int,
+        default=1,
+        help='the number of words per block')
+
+    parser.add_argument(
+        '--word-addrs',
+        nargs='+',
+        type=int,
+        required=True,
+        help='one or more base-10 word addresses')
+
+    parser.add_argument(
+        '--num-addr-bits',
+        type=int,
+        default=1,
+        help='the number of bits in each given word address')
+
+    parser.add_argument(
+        '--replacement-policy',
+        choices=('lru', 'mru'),
+        default='lru',
+        # Ignore argument case (e.g. "mru" and "MRU" are equivalent)
+        type=str.lower,
+        help='the cache replacement policy (LRU or MRU)')
+
+    return parser.parse_args()
+
+
+def main():
+
+    cli_args = parse_cli_args()
+    sim = Simulator()
+    sim.run_simulation(**vars(cli_args))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/cachesimulator/bin_addr.py b/cachesimulator/bin_addr.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+
+
+class BinaryAddress(str):
+
+    # Retrieves the binary address of a certain length for a base-10 word
+    # address; we must define __new__ instead of __init__ because the class we
+    # are inheriting from (str) is an immutable data type
+    def __new__(cls, bin_addr=None, word_addr=None, num_addr_bits=0):
+
+        if word_addr is not None:
+            return super().__new__(
+                cls, bin(word_addr)[2:].zfill(num_addr_bits))
+        else:
+            return super().__new__(cls, bin_addr)
+
+    @classmethod
+    def prettify(cls, bin_addr, min_bits_per_group):
+
+        mid = len(bin_addr) // 2
+
+        if mid < min_bits_per_group:
+            # Return binary string immediately if bisecting the binary string
+            # produces a substring which is too short
+            return bin_addr
+        else:
+            # Otherwise, bisect binary string and separate halves with a space
+            left = cls.prettify(bin_addr[:mid], min_bits_per_group)
+            right = cls.prettify(bin_addr[mid:], min_bits_per_group)
+            return ' '.join((left, right))
+
+    # Retrieves the tag used to distinguish cache entries with the same index
+    def get_tag(self, num_tag_bits):
+
+        end = num_tag_bits
+        tag = self[:end]
+        if len(tag) != 0:
+            return tag
+        else:
+            return None
+
+    # Retrieves the index used to group blocks in the cache
+    def get_index(self, num_offset_bits, num_index_bits):
+
+        start = len(self) - num_offset_bits - num_index_bits
+        end = len(self) - num_offset_bits
+        index = self[start:end]
+        if len(index) != 0:
+            return index
+        else:
+            return None
+
+    # Retrieves the word offset used to select a word in the data pointed to by
+    # the given binary address
+    def get_offset(self, num_offset_bits):
+
+        start = len(self) - num_offset_bits
+        offset = self[start:]
+        if len(offset) != 0:
+            return offset
+        else:
+            return None
diff --git a/cachesimulator/cache.py b/cachesimulator/cache.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+
+from cachesimulator.bin_addr import BinaryAddress
+from cachesimulator.reference import ReferenceCacheStatus
+from cachesimulator.word_addr import WordAddress
+
+
+class Cache(dict):
+
+    # Initializes the reference cache with a fixed number of sets
+    def __init__(self, cache=None, num_sets=None, num_index_bits=0):
+
+        # A list of recently ordered addresses, ordered from least-recently
+        # used to most
+        self.recently_used_addrs = []
+
+        if cache is not None:
+            self.update(cache)
+        else:
+            for i in range(num_sets):
+                index = BinaryAddress(
+                    word_addr=WordAddress(i), num_addr_bits=num_index_bits)
+                self[index] = []
+
+    # Every time we see an address, place it at the top of the
+    # list of recently-seen addresses
+    def mark_ref_as_last_seen(self, ref):
+
+        # The index and tag (not the offset) uniquely identify each address
+        addr_id = (ref.index, ref.tag)
+        if addr_id in self.recently_used_addrs:
+            self.recently_used_addrs.remove(addr_id)
+        self.recently_used_addrs.append(addr_id)
+
+    # Returns True if a block at the given index and tag exists in the cache,
+    # indicating a hit; returns False otherwise, indicating a miss
+    def is_hit(self, addr_index, addr_tag):
+
+        # Ensure that indexless fully associative caches are accessed correctly
+        if addr_index is None:
+            blocks = self['0']
+        elif addr_index in self:
+            blocks = self[addr_index]
+        else:
+            return False
+
+        for block in blocks:
+            if block['tag'] == addr_tag:
+                return True
+
+        return False
+
+    # Adds the given entry to the cache at the given index
+    def set_block(self, replacement_policy,
+                  num_blocks_per_set, addr_index, new_entry):
+
+        # Place all cache entries in a single set if cache is fully associative
+        if addr_index is None:
+            blocks = self['0']
+        else:
+            blocks = self[addr_index]
+        # Replace MRU or LRU entry if number of blocks in set exceeds the limit
+        if len(blocks) == num_blocks_per_set:
+            # Iterate through the recently-used entries in reverse order for
+            # MRU
+            if replacement_policy == 'mru':
+                recently_used_addrs = reversed(self.recently_used_addrs)
+            else:
+                recently_used_addrs = self.recently_used_addrs
+            # Replace the first matching entry with the entry to add
+            for recent_index, recent_tag in recently_used_addrs:
+                for i, block in enumerate(blocks):
+                    if (recent_index == addr_index and
+                            block['tag'] == recent_tag):
+                        blocks[i] = new_entry
+                        return
+        else:
+            blocks.append(new_entry)
+
+    # Simulate the cache by reading the given address references into it
+    def read_refs(self, num_blocks_per_set,
+                  num_words_per_block, replacement_policy, refs):
+
+        for ref in refs:
+            self.mark_ref_as_last_seen(ref)
+
+            # Record if the reference is already in the cache or not
+            if self.is_hit(ref.index, ref.tag):
+                # Give emphasis to hits in contrast to misses
+                ref.cache_status = ReferenceCacheStatus.hit
+            else:
+                ref.cache_status = ReferenceCacheStatus.miss
+                self.set_block(
+                    replacement_policy=replacement_policy,
+                    num_blocks_per_set=num_blocks_per_set,
+                    addr_index=ref.index,
+                    new_entry=ref.get_cache_entry(num_words_per_block))
diff --git a/cachesimulator/reference.py b/cachesimulator/reference.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+
+from collections import OrderedDict
+from enum import Enum
+
+from cachesimulator.bin_addr import BinaryAddress
+from cachesimulator.word_addr import WordAddress
+
+
+# An address reference consisting of the address and all of its components
+class Reference(object):
+
+    def __init__(self, word_addr, num_addr_bits,
+                 num_offset_bits, num_index_bits, num_tag_bits):
+        self.word_addr = WordAddress(word_addr)
+        self.bin_addr = BinaryAddress(
+            word_addr=self.word_addr, num_addr_bits=num_addr_bits)
+        self.offset = self.bin_addr.get_offset(num_offset_bits)
+        self.index = self.bin_addr.get_index(num_offset_bits, num_index_bits)
+        self.tag = self.bin_addr.get_tag(num_tag_bits)
+        self.cache_status = None
+
+    def __str__(self):
+        return str(OrderedDict(sorted(self.__dict__.items())))
+
+    __repr__ = __str__
+
+    # Return a lightweight entry to store in the cache
+    def get_cache_entry(self, num_words_per_block):
+        return {
+            'tag': self.tag,
+            'data': self.word_addr.get_consecutive_words(
+                num_words_per_block)
+        }
+
+
+# An enum representing the cache status of a reference (i.e. hit or miss)
+class ReferenceCacheStatus(Enum):
+
+    miss = 0
+    hit = 1
+
+    # Define how reference statuses are displayed in simulation results
+    def __str__(self):
+        if self.value == ReferenceCacheStatus.hit.value:
+            return 'HIT'
+        else:
+            return 'miss'
+
+    __repr__ = __str__