# Hybrid RL-Enhanced Cache Management - Colab Training

Train a hybrid cache system where RL augments traditional heuristics (LRU/LFU) with eviction priority scores.

In [None]:
!git clone https://github.com/haseebmalik18/name-undecided.git
%cd name-undecided

In [None]:
!pip install -q -r requirements.txt

In [None]:
import sys
sys.path.append('src')

import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import torch

from agents.hybrid_environment import HybridCacheEnv
from agents.priority_agent import PriorityAgent
from workloads.generators import ZipfWorkload
from metrics.tracker import MetricsTracker
from visualization.plotter import plot_training_metrics, plot_comparison
from cache.policies import LRUCache

print(f"Using device: {torch.device('cuda' if torch.cuda.is_available() else 'cpu')}")

## Configuration

In [None]:
CACHE_CAPACITY = 100
NUM_ITEMS = 1000
EPISODES = 1000
EPISODE_LENGTH = 1000
ZIPF_ALPHA = 1.5
BASE_POLICY = 'lru'
RL_WEIGHT = 0.5
LEARNING_RATE = 0.001

## Initialize Hybrid Environment and Agent

In [None]:
workload = ZipfWorkload(num_items=NUM_ITEMS, alpha=ZIPF_ALPHA, seed=42)

env = HybridCacheEnv(
    cache_capacity=CACHE_CAPACITY,
    num_items=NUM_ITEMS,
    workload_generator=workload,
    episode_length=EPISODE_LENGTH,
    base_policy=BASE_POLICY,
    rl_weight=RL_WEIGHT
)

state_size = env.observation_space.shape[0]
output_size = CACHE_CAPACITY

agent = PriorityAgent(
    state_size=state_size,
    output_size=output_size,
    learning_rate=LEARNING_RATE
)

metrics = MetricsTracker()

print(f"State Size: {state_size}, Output Size: {output_size}")
print(f"Base Policy: {BASE_POLICY.upper()}, RL Weight: {RL_WEIGHT}")

## Training Loop

In [None]:
for episode in tqdm(range(EPISODES), desc="Training Hybrid"):
    state, _ = env.reset()
    total_reward = 0
    
    for step in range(EPISODE_LENGTH):
        action = agent.select_action(state, training=True)
        next_state, reward, done, truncated, info = env.step(action)
        
        agent.store_transition(state, action, reward, next_state, done)
        loss = agent.train()
        
        if loss is not None:
            metrics.add_loss(loss)
        
        total_reward += reward
        state = next_state
        
        if done or truncated:
            break
    
    cache_metrics = env.cache.get_metrics()
    metrics.add_episode(
        total_reward,
        cache_metrics['hit_rate'],
        cache_metrics['avg_latency'],
        cache_metrics['bandwidth_used']
    )
    
    if (episode + 1) % 100 == 0:
        stats = metrics.get_stats(window=100)
        rl_influence = cache_metrics.get('rl_influence_rate', 0.0)
        print(f"\nEpisode {episode + 1}/{EPISODES}")
        print(f"  Hit Rate: {stats['mean_hit_rate']:.3f} ± {stats['std_hit_rate']:.3f}")
        print(f"  Reward: {stats['mean_reward']:.2f}")
        print(f"  RL Influence: {rl_influence:.2%}")
        print(f"  Epsilon: {agent.epsilon:.3f}")

## Evaluate Pure LRU Baseline

In [None]:
lru = LRUCache(capacity=CACHE_CAPACITY)
lru_hit_rates = []

workload_lru = ZipfWorkload(num_items=NUM_ITEMS, alpha=ZIPF_ALPHA, seed=42)

for episode in tqdm(range(100), desc="LRU Eval"):
    lru.reset()
    requests = workload_lru.generate(EPISODE_LENGTH)
    
    for req in requests:
        lru.access(req)
    
    lru_hit_rates.append(lru.get_hit_rate())

lru_mean = np.mean(lru_hit_rates)
lru_std = np.std(lru_hit_rates)
print(f"Pure LRU Hit Rate: {lru_mean:.3f} ± {lru_std:.3f}")

hybrid_mean = np.mean(metrics.episode_hit_rates[-100:])
print(f"Hybrid Hit Rate: {hybrid_mean:.3f}")
print(f"Improvement: {((hybrid_mean - lru_mean) / lru_mean * 100):.2f}%")

## Visualize Results

In [None]:
plot_training_metrics(metrics)

In [None]:
results = {
    'Hybrid (LRU + RL)': {
        'hit_rates': metrics.episode_hit_rates,
        'rewards': metrics.episode_rewards
    },
    'LRU Only': {
        'hit_rates': lru_hit_rates + [lru_mean] * (len(metrics.episode_hit_rates) - len(lru_hit_rates)),
        'rewards': [0] * len(metrics.episode_rewards)
    }
}

plot_comparison(results)

## RL Influence Analysis

In [None]:
final_metrics = env.cache.get_metrics()
print(f"Total Evictions: {final_metrics['evictions']}")
print(f"RL Influenced Evictions: {final_metrics['rl_influenced_evictions']}")
print(f"RL Influence Rate: {final_metrics['rl_influence_rate']:.2%}")

## Save Model

In [None]:
agent.save('hybrid_agent_colab.pth')
metrics.save('metrics_hybrid_colab.json')
print("Model and metrics saved!")

## Download Results

In [None]:
from google.colab import files

files.download('hybrid_agent_colab.pth')
files.download('metrics_hybrid_colab.json')