### Standard Caching Algorithms

Here, I have written few standard cache replacement policies in Python. I have tried to make them as efficient as I can with python efficient datatypes such as deque, defaultdict, etc from python's collection library.

##### Importing required packages

In [1]:
from tqdm import tqdm_notebook as tqdm 
import numpy as np
from collections import deque, defaultdict
import timeit
import pandas as pd

###### Blocktrace data on which we check the performance of our code

In [2]:
df = pd.read_csv('smalltrace.csv', sep='\t')
df.columns = ['no','timestamp','pid','pname','blockNo', 'blockSize', 'readOrWrite', 'bdMajor', 'bdMinor', 'hash']
df.head()

Unnamed: 0,no,timestamp,pid,pname,blockNo,blockSize,readOrWrite,bdMajor,bdMinor,hash
0,0,89966811921625,4257,nfsd,488846200,8,W,6,0,2c4787c7671c2f638777c4ab72859ce1
1,1,89967003980393,4253,nfsd,508455704,8,W,6,0,88b93b628d84082186026d9da044f173
2,2,89967004049935,4253,nfsd,508455712,8,W,6,0,b5e9f4e5ab62a4fff5313a606b0ad4e3
3,3,89967004093615,4253,nfsd,508455720,8,W,6,0,7af5556ad4d45263bcd8358867333dd4
4,4,89967324150054,4254,nfsd,508500288,8,W,6,0,88b93b628d84082186026d9da044f173


In [3]:
blocktrace = df['blockNo'].tolist()
len(blocktrace)

10000

### FIFO (First In First Out)

In [4]:
def FIFO(blocktrace, frame):
    
    cache = deque(maxlen=frame)
    hit, miss = 0, 0
    
    for block in tqdm(blocktrace, leave=False):
        
        if block in cache:
            hit += 1

        else:
            cache.append(block)
            miss += 1
    
    hitrate = hit / (hit+miss)
    return hitrate 

In [5]:
FIFO(blocktrace, 50)

HBox(children=(IntProgress(value=0, max=10000), HTML(value='')))



0.0734

### LIFO (Last In First Out)

In [6]:
def LIFO(blocktrace, frame):
    
    cache = deque(maxlen=frame)
    hit, miss = 0, 0
    
    for block in tqdm(blocktrace, leave=False):
        if block in cache:
            hit += 1
            
        elif len(cache) < frame:
            cache.append(block)
            miss += 1
        
        else:
            cache.pop()
            cache.append(block)
            miss += 1
            
    hitrate = hit / (hit + miss)
    return hitrate

In [7]:
LIFO(blocktrace, 50)

HBox(children=(IntProgress(value=0, max=10000), HTML(value='')))



0.0667

### LRU (Least Recently Used)

In [8]:
def LRU(blocktrace, frame):
    
    cache = set()
    recency = deque()
    hit, miss = 0, 0
    
    for block in tqdm(blocktrace, leave=False):
        
        if block in cache:
            recency.remove(block)
            recency.append(block)
            hit += 1
            
        elif len(cache) < frame:
            cache.add(block)
            recency.append(block)
            miss += 1
            
        else:
            cache.remove(recency[0])
            recency.popleft()
            cache.add(block)
            recency.append(block)
            miss += 1
    
    hitrate = hit / (hit + miss)
    return hitrate

In [9]:
LRU(blocktrace, 500)

HBox(children=(IntProgress(value=0, max=10000), HTML(value='')))



0.0749

### LFU (Least Frequently Used)

In [10]:
def LFU(blocktrace, frame):
    
    cache = set()
    cache_frequency = defaultdict(int)
    frequency = defaultdict(int)
    
    hit, miss = 0, 0
    
    for block in tqdm(blocktrace):
        frequency[block] += 1
        
        if block in cache:
            hit += 1
            cache_frequency[block] += 1
        
        elif len(cache) < frame:
            cache.add(block)
            cache_frequency[block] += 1
            miss += 1

        else:
            e, f = min(cache_frequency.items(), key=lambda a: a[1])
            cache_frequency.pop(e)
            cache.remove(e)
            cache.add(block)
            cache_frequency[block] = frequency[block]
            miss += 1
    
    hitrate = hit / ( hit + miss )
    return hitrate

In [11]:
LFU(blocktrace, 500)

HBox(children=(IntProgress(value=0, max=10000), HTML(value='')))




0.0751

### Belady's Optimal Caching Algorithm

In [12]:
def getFurthestAccessBlock(C, OPT):
    maxAccessPosition = -1
    maxAccessBlock = -1
    for cached_block in C:
        if len(OPT[cached_block]) is 0:
            return cached_block            
    for cached_block in C:
        if OPT[cached_block][0] > maxAccessPosition:
            maxAccessPosition = OPT[cached_block][0]
            maxAccessBlock = cached_block
    return maxAccessBlock

def belady_opt(blocktrace, frame):
    OPT = defaultdict(deque)

    for i, block in enumerate(tqdm(blocktrace, desc="OPT: building index")):
        OPT[block].append(i)    

    #print ("created OPT dictionary")    

    hit, miss = 0, 0

    C = set()
    seq_number = 0
    for block in tqdm(blocktrace, desc="OPT"):

        if block in C:
            #OPT[block] = OPT[block][1:]
            OPT[block].popleft()
            hit+=1
            #print('hit' + str(block))
            #print(OPT)
        else:
            #print('miss' + str(block))
            miss+=1
            if len(C) == frame:
                fblock = getFurthestAccessBlock(C, OPT)
                assert(fblock != -1)
                C.remove(fblock)
            C.add(block)
            #OPT[block] = OPT[block][1:]
            #print(OPT)
            OPT[block].popleft()

    #print ("hit count" + str(hit_count))
    #print ("miss count" + str(miss_count))
    hitrate = hit / (hit + miss)
    #print(hitrate)
    return hitrate

In [13]:
belady_opt(blocktrace, 500)

HBox(children=(IntProgress(value=0, description='OPT: building index', max=10000), HTML(value='')))




HBox(children=(IntProgress(value=0, description='OPT', max=10000), HTML(value='')))




0.0793