# simple_dreamer.py

Auto-generated implementation from the Agentic RL PhD codebase.

### Original Implementations & References
The following links point to the official or high-quality reference implementations for the papers covered in this notebook:

- https://github.com/danijar/dreamerv3

*Note: The code below is a simplified pedagogical implementation.*

In [None]:
import torch
import torch.nn as nn

# Paper: "World Models" (Ha & Schmidhuber, 2018) & "DreamerV3" (Hafner et al., 2023)
# Category: Model-Based RL

class WorldModel(nn.Module):
    def __init__(self, obs_dim, latent_dim, action_dim):
        super().__init__()
        
        # 1. Vision Model (V) / Encoder
        # Compresses high-dim observation to latent vector
        self.encoder = nn.Sequential(
            nn.Linear(obs_dim, 128),
            nn.ReLU(),
            nn.Linear(128, latent_dim)
        )
        
        # 2. Memory Model (M) / RSSM (Recurrent State Space Model)
        # Predicts next latent state given current state and action
        # z_{t+1} = f(z_t, a_t, h_t)
        self.rnn = nn.GRUCell(latent_dim + action_dim, latent_dim)
        
        # 3. Controller (C) / Policy
        # Takes latent state (not pixels) and outputs action
        self.controller = nn.Linear(latent_dim, action_dim)
        
    def dream(self, initial_state, horizon=10):
        """
        Dreaming: Simulating the future in the latent space.
        Used to train the controller without real environment interaction.
        """
        current_state = initial_state
        dream_trajectory = []
        
        for _ in range(horizon):
            # Agent chooses action based on hallucination
            action = self.controller(current_state)
            
            # World Model predicts what happens next
            # (Simplified: typically involves mixing deterministic RNN state + stochastic latent state)
            next_state = self.rnn(torch.cat([current_state, action], dim=1))
            
            dream_trajectory.append((current_state, action, next_state))
            current_state = next_state
            
        return dream_trajectory

# Note: DreamerV3 adds 'Symlog' scaling, discrete autoencoders, and KL balancing.
# This is a simplified "Ha & Schmidhuber" style architecture.
