# Meta Learning Cache Replacement Policy

## Install Dependency

In [1]:
import sys
import random
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm 
from collections import Counter, deque, defaultdict
from sklearn import preprocessing
from sklearn.preprocessing import normalize
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.neural_network import MLPClassifier
import tensorflow as tf
import pickle


## Block Cache Model

### Data Preprocessing and Data Construction

In [2]:
# Taken from Shehbaz
def get_recency(lru, cache):
    recency = []
    recency_dict = defaultdict(int)
    
    # Compute the recency order of each page in cache
    for time in range(len(lru)):
        recency_dict[lru[time]] = time + 1
        
    for block in cache:
        recency.append(recency_dict[block])

    return recency

def get_frequency(lfu, cache):
    frequency = []
    
    for block in cache:
        frequency.append(lfu[block])
    return frequency

def normalize_columns(input):
    return normalize(input, axis=0)

def identity(input):
    return input

### Belady Optimal Algorithm (From Shehbaz)

In [3]:

def belady_opt(blocktrace, frame):
    global maxpos, sequence_data, sequence_length, opt_hitrates
    
    optimal = defaultdict(deque)
    deleted = defaultdict(int)
    lfu = defaultdict(int)
    lru = []

    # Build the whole index for finding optimal eviction ordering
    for request_time, block in enumerate(tqdm(blocktrace, desc="OPT: building index")):
        optimal[block].append(request_time)

    hit, miss = 0, 0

    cache = []
    
    for request_time, block in enumerate(tqdm(blocktrace, desc="OPT")):
        # increase frequency count
        lfu[block] +=1

        # Remove the block i at time step j from the index
        if len(optimal[block]) is not 0 and optimal[block][0] == request_time:
            optimal[block].popleft()

        
        if block in cache:
            # Cache Hit
            # Update block to MRU position
            hit += 1
            lru.remove(block)
            lru.append(block)
            
            assert request_time in deleted
            
            del deleted[request_time]
            if len(optimal[block]) is not 0:
                deleted[optimal[block][0]] = block
                optimal[block].popleft()
            else:
                deleted[maxpos] = block
                maxpos -= 1
        else:
            # Cache Miss
            miss += 1
            if len(cache) == frame:
                # Cache is full
                assert(len(deleted) == frame)
                evictpos = max(deleted)
        
    
                cache[cache.index(deleted[evictpos])] = block
                lru.remove(deleted[evictpos])
                del deleted[evictpos]
            else:
                # Cache isn't full
                cache.append(block)                
            
            # Add the candidate victim page to the j'th time step
            if len(optimal[block]) is not 0:
                deleted[optimal[block][0]] = block
                optimal[block].popleft()
            else:
                deleted[maxpos] = block
                maxpos -= 1
            lru.append(block)
        if (request_time + 1) % 10 == 0:
          opt_hitrates.append(hit / (hit + miss))


    hitrate = hit / (hit + miss)

    return hitrate

## LFU

In [4]:

def LFU(blocktrace, frame):
    global lfu_hitrates
    cache = set()
    cache_frequency = defaultdict(int)
    frequency = defaultdict(int)
    
    hit, miss = 0, 0
    
    for request_time, block in enumerate(tqdm(blocktrace, desc="LFU")):
        frequency[block] += 1
        
        if block in cache:
            hit += 1
            cache_frequency[block] += 1
        elif len(cache) < frame:
            cache.add(block)
            cache_frequency[block] += 1
            miss += 1
        else:
            e, f = min(cache_frequency.items(), key=lambda a: a[1])
            cache_frequency.pop(e)
            cache.remove(e)
            cache.add(block)
            cache_frequency[block] = frequency[block]
            miss += 1
        if (request_time + 1) % 10 == 0:
          lfu_hitrates.append(hit / (hit + miss))
        
    hitrate = hit / ( hit + miss )
    return hitrate

## LRU

In [5]:

def LRU(blocktrace, frame):
    global lru_hitrates
    cache = set()
    recency = deque()
    hit, miss = 0, 0
    
    for request_time, block in enumerate(tqdm(blocktrace, desc="LRU")):
        
        if block in recency:
            recency.remove(block)
            recency.append(block)
            hit += 1
        elif len(recency) < frame:
            recency.append(block)
            miss += 1
        else:
            recency.popleft()
            recency.append(block)
            miss += 1
        if (request_time + 1) % 10 == 0:
          lru_hitrates.append(hit / (hit + miss))
        
    hitrate = hit / (hit + miss)
    return hitrate

In [6]:
#train_file = "FIU_output/casa-110108-112108.4.blkparse"
train_file = "rl_envs/cheetah.cs.fiu.edu-110108-113008.3.blkparse"
df = pd.read_csv(train_file, sep=' ',header = None)
df.columns = ['timestamp','pid','pname','blockNo', \
              'blockSize', 'readOrWrite', 'bdMajor', 'bdMinor', 'hash']

trainRaw = df['blockNo'].tolist()
print(len(trainRaw))

23374516


In [8]:
maxpos = 100000000
cache_size = 30
#baselines_file = "baseline_" + train_file.split("/")[1] + "-1-{}.pkl".format(len(trainRaw))
baselines_file = "baseline_C{}".format(cache_size) + train_file.split("/")[1] + "-50001-{}.pkl".format(100000)

#trainBlockTrace = trainBlockTrace[:int(len(trainBlockTrace) * read_portion)]
trainBlockTrace = trainRaw[50000:100000]
print(len(trainBlockTrace))

lru_hitrates = []
lfu_hitrates = []
opt_hitrates = []

lru_hitrate = LRU(trainBlockTrace, cache_size)
lfu_hitrate = LFU(trainBlockTrace, cache_size)
opt_hitrate = belady_opt(trainBlockTrace, cache_size)

print(train_file)
print("Cache size: {}".format(cache_size))
print("OPT: {}".format(opt_hitrate))
print("LFU: {}".format(lfu_hitrate))
print("LRU: {}".format(lru_hitrate))

#with open(baselines_file, 'wb+') as f:
#    pickle.dump([lru_hitrates, lru_hitrate, lfu_hitrates, lfu_hitrate, opt_hitrates, opt_hitrate], f)

50000


HBox(children=(IntProgress(value=0, description='LRU', max=50000, style=ProgressStyle(description_width='initi…




HBox(children=(IntProgress(value=0, description='LFU', max=50000, style=ProgressStyle(description_width='initi…




HBox(children=(IntProgress(value=0, description='OPT: building index', max=50000, style=ProgressStyle(descript…




HBox(children=(IntProgress(value=0, description='OPT', max=50000, style=ProgressStyle(description_width='initi…


rl_envs/cheetah.cs.fiu.edu-110108-113008.3.blkparse
Cache size: 30
OPT: 0.23534
LFU: 0.2339
LRU: 0.2323
