In [1]:
import pickle
from util import parse_har

In [2]:
class CDN:
    
    # The CDN implements a clock-handle 
    def __init__(self, size_mb: int) -> None:
        self.size = size_mb * 1024 * 1024
        self.hashmap = {}
        self.current_size = 0
    
        # Counters
        self.total_requests = 0
        self.hit = 0
        
        # Related to LSTM time quanta
    
    def fetch(self, path: str, size: int):
        # print("Accessing {}".format(path))
        self.total_requests += 1
        # Has it cached, return, do nothing
        # NOTE: we ignore CDN's fetching update, not important
        if path in self.hashmap:
            self.hit += 1
            # Pop and Push, Python Dict maintains insertion order
            value = self.hashmap.pop(path)
            self.hashmap[path] = value
            
            return value
        
        # Not cached, we need to grab
        if self.current_size + size > self.size:
            self.evict(size)
            
        # After evict, we insert
        self.hashmap[path] = (path, size)
        self.current_size += size
        # print("Newly cached. Size now is {}/{} bytes, {:.2f}% full".format(self.current_size, self.size, self.current_size/self.size*100))
        
    def evict(self, incoming_size):
        # Remove the head of the dict, until size+self.current_size < self.size_mb
        while self.current_size + incoming_size > self.size:
            # Pop the head
            key = next(iter(self.hashmap))
            value = self.hashmap.pop(key)
            
            self.current_size -= value[1]
            # print("Evicting {}".format(key))

In [3]:
# Load the HAR dict pickle file
uchicago_har = pickle.load(open("../generator/user_600_hour_6_cs.uchicago.edu.har", "rb"))
uchicago_har = parse_har(uchicago_har)

In [4]:
# We replay it twice so that we will have less cold miss
uchicago_har = uchicago_har.append(uchicago_har)

  uchicago_har = uchicago_har.append(uchicago_har)


In [8]:
cdn = CDN(100)

for row in uchicago_har.iloc():
    path = row['url']
    size_mb = row['body_size']
    
    cdn.fetch(path, size_mb)

In [9]:
print("Hit Ratio: {:.2f}%".format(cdn.hit/cdn.total_requests*100))

Hit Ratio: 76.04%
