In [3]:
import random
import numpy as np
import pandas as pd

In [4]:
from buffer_pool_baseline.environment import Query, Cache, Time
from IPython.display import clear_output
from tqdm import tqdm

In [5]:
%matplotlib inline

In [6]:
q_table = np.zeros([1000, 2])
cache_size = 4

In [7]:
def setup():
    t = Time(0)
    
    c = Cache(cache_size, t, equate_id_to_value=True)
    
    start = random.randint(0, 20)
    end = start + 10
    loop_size = random.randint(0, 2)
    
    env = Query(query_type="sequential", time=t, parameters={"start": start, "end": end, "loop_size": loop_size})
    env.set_query_cache(c)
    
    c.add_element(random.randint(0, 30))
    c.add_element(random.randint(0, 30))
    c.add_element(random.randint(0, 30))
    c.add_element(random.randint(0, 30))
    return env

In [16]:
def get_query(query_type, table_size, loop_size):
    t = Time()
    if query_type == "sequential":
        return Query(query_type="sequential", time=t, parameters={"start": 0, "end": table_size, "loop_size": loop_size})
    
    if query_type == "select":
        return Query(query_type="select", time=t, parameters={"start": 0, "end": table_size})
    
    if query_type == "join":
        return Query(query_type="join", time=t, parameters={"start_table_1": 0, "end_table_1": 10, 
                                                            "start_table_2": 0, "end_table_2": 10})
    

In [17]:
def get_time_steps_per_query(table_size, cache_size, loop_size):
    time_steps = {}
    
    query_types = ["sequential", "select", "join"]
    
    for query in query_types:
        t = Time(0)
        env = get_query(query, table_size, loop_size)
        c = Cache(cache_size, t, equate_id_to_value=True)
        env.set_query_cache(c)

        while not env.is_done():
            env.step("mru")

        time_steps[query] = env.time.now()
    
    return time_steps

In [18]:
def encode_queries(query_type, time_step, max_time_steps):
    return max_time_steps[query_type] + time_step

In [19]:
max_time_steps = get_time_steps_per_query(10, 4, 5)

In [21]:
alpha = 0.05
gamma = 0.6
epsilon = 0.1

# For plotting metrics
all_epochs = []
all_penalties = []

In [22]:
q_values_cumulative = []
old_q = np.zeros([1000, 2])

In [23]:
number_of_runs = 10000

In [24]:
cum_reward_plot = []

for i in tqdm(range(1, number_of_runs)):
    
    env = setup()
    state = encode_queries(env.query_type, env.time.now(), max_time_steps)
    
    penalties, reward, = 0, 0
    done = False
    previous_hit, previous_miss = 0, 0
    cum_reward = 0
    
    while not done:
        if random.uniform(0, 1) < epsilon:
            action = random.choice(env.actions) # Explore action space
        else:
            _action_ = np.argmax(q_table[state]) # Exploit learned values
            action = "mru" if _action_ == 0 else "lru"
            
        hits, miss = env.step(action)
        next_state = encode_queries(env.query_type, env.time.now(), max_time_steps)
        
        reward = hits - previous_hit
        penalties = miss - previous_miss
        
        cum_reward += reward
        
        previous_hit = hits
        previous_miss = miss
        
        done = env.done

        _action_ = 0 if action == "mru" else 1
        
        old_value = q_table[state, _action_]
        next_max = np.max(q_table[next_state])
        
        r = reward - (10 * penalties)
        new_value = (1 - alpha) * old_value + alpha * (r + gamma * next_max)
        q_table[state, _action_] = new_value
        state = next_state
        
    q_values_cumulative.append(sum(sum(abs(old_q - q_table))))
    old_q = q_table.copy()
    cum_reward_plot.append({"reward": cum_reward, "epoch": i})

print("Training finished.\n")

100%|█████████████████████████████████████████████████████████████████████████████| 9999/9999 [00:18<00:00, 543.71it/s]

Training finished.






In [None]:
df = pd.DataFrame(q_values_cumulative)

In [26]:
max_time_steps

{'sequential': 66, 'select': 11, 'join': 111}

In [59]:
def evaluate_policy(query, q_table):
    cache_map_policy = []
    
    start_index = max_time_steps[query.query_type]
    
    reward = 0
    
    for d in q_table[start_index:]:
        action = ["mru", "lru"][np.argmax(d)]
        cache_map_policy.append({"action": action, 
                                 "cache": query.cache.cache_map.copy().keys(),
                                "which_element": query.parameters.copy()})
        query.step(action)
        
        if query.is_done():
            break
    
    hits, misses = query.step()
    reward = hits - (10 * misses)
    return reward, cache_map_policy
    

In [60]:
def get_baseline(query):
    cache_map_baseline = []
    
    while not query.is_done():
        query.step("mru")
        cache_map_baseline.append({"cache": query.cache.cache_map.copy().keys(), 
                                   "which_element": query.parameters.copy()})
        
    hits, misses = query.step()
    return hits - (10 * misses), cache_map_baseline

In [65]:
def compare_reward():
    env = setup()

    cache = env.cache.copy()
    query_baseline = Query(query_type="sequential", parameters=env.parameters.copy(), time=Time(0))
    
    cache.cache_map = {}
    query_baseline.set_query_cache(cache)

    reward_policy, cache_map_policy = evaluate_policy(env, q_table)
    reward_baseline, cache_map_baseline = get_baseline(query_baseline)
    
    
    return {"policy": reward_policy, "reward_baseline": reward_baseline,
            "policy_cache_map": cache_map_policy, "baseline_cache_map": cache_map_baseline}

In [66]:
results = []

for i
res = compare_reward()

In [67]:
res["reward_baseline"]

-100

In [68]:
res["policy"]

-89

In [64]:
res["policy_cache_map"][3:]

[{'action': 'lru',
  'cache': dict_keys([7, 21, 9, 3]),
  'which_element': {'start': 1,
   'end': 11,
   'loop_size': 2,
   'current_position': 4,
   'current_counter': 0}},
 {'action': 'lru',
  'cache': dict_keys([21, 9, 3, 4]),
  'which_element': {'start': 1,
   'end': 11,
   'loop_size': 2,
   'current_position': 5,
   'current_counter': 0}},
 {'action': 'mru',
  'cache': dict_keys([9, 3, 4, 5]),
  'which_element': {'start': 1,
   'end': 11,
   'loop_size': 2,
   'current_position': 6,
   'current_counter': 0}},
 {'action': 'lru',
  'cache': dict_keys([9, 3, 4, 6]),
  'which_element': {'start': 1,
   'end': 11,
   'loop_size': 2,
   'current_position': 7,
   'current_counter': 0}},
 {'action': 'mru',
  'cache': dict_keys([3, 4, 6, 7]),
  'which_element': {'start': 1,
   'end': 11,
   'loop_size': 2,
   'current_position': 8,
   'current_counter': 0}},
 {'action': 'mru',
  'cache': dict_keys([3, 4, 6, 8]),
  'which_element': {'start': 1,
   'end': 11,
   'loop_size': 2,
   'current_p

In [69]:
res["baseline_cache_map"]

[{'cache': dict_keys([3]),
  'which_element': {'start': 3,
   'end': 13,
   'loop_size': 0,
   'current_position': 4,
   'current_counter': 0}},
 {'cache': dict_keys([3, 4]),
  'which_element': {'start': 3,
   'end': 13,
   'loop_size': 0,
   'current_position': 5,
   'current_counter': 0}},
 {'cache': dict_keys([3, 4, 5]),
  'which_element': {'start': 3,
   'end': 13,
   'loop_size': 0,
   'current_position': 6,
   'current_counter': 0}},
 {'cache': dict_keys([3, 4, 5, 6]),
  'which_element': {'start': 3,
   'end': 13,
   'loop_size': 0,
   'current_position': 7,
   'current_counter': 0}},
 {'cache': dict_keys([4, 5, 6, 7]),
  'which_element': {'start': 3,
   'end': 13,
   'loop_size': 0,
   'current_position': 8,
   'current_counter': 0}},
 {'cache': dict_keys([5, 6, 7, 8]),
  'which_element': {'start': 3,
   'end': 13,
   'loop_size': 0,
   'current_position': 9,
   'current_counter': 0}},
 {'cache': dict_keys([6, 7, 8, 9]),
  'which_element': {'start': 3,
   'end': 13,
   'loop_siz

In [None]:
results = []
for i in range(1000):
    results.append(compare_reward())

In [None]:
for s in q_table[max_time_steps["sequential"]:]:
    print(["mru", "lru"][np.argmax(s)])

In [None]:
df_reward.sort_values("epoch").plot(x="epoch", y="reward")

In [None]:
result = "Results/runs_{}_alpha_{}_gamma_{}_cache_{}.csv".format(number_of_runs, alpha, gamma, cache_size)

In [None]:
df.to_csv(result)