<a href="https://colab.research.google.com/github/jcmachicao/deep_learning_2025_curso/blob/main/demo_tensores_traduccion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Toy RNN forward pass (tiny sizes) to illustrate input_size=4, hidden_size=6, output_size=10
import numpy as np
import pandas as pd
from math import exp

# For reproducibility
rng = np.random.default_rng(42)

# Sizes
input_size = 4       # one-hot over 4 Spanish tokens
hidden_size = 6      # "ideas/context" capacity
output_size = 10     # English target vocab (restricted domain)

# Tiny vocabularies
src_vocab = ["el", "perro", "corre", "<eos>"]
tgt_vocab = ["<pad>", "the", "a", "dog", "runs", "in", "on", "park", "<eos>", "quickly"]

# Parameters (initialized with small values)
Wxh = rng.normal(0, 0.3, size=(hidden_size, input_size))   # hidden from input
Whh = rng.normal(0, 0.3, size=(hidden_size, hidden_size))  # hidden from previous hidden
bh  = np.zeros((hidden_size,))                              # hidden bias

Why = rng.normal(0, 0.3, size=(output_size, hidden_size))  # output from hidden
by  = np.zeros((output_size,))                              # output bias

def softmax(z):
    z = z - np.max(z)
    e = np.exp(z)
    return e / np.sum(e)

def tanh(x):
    return np.tanh(x)

# Build an input sequence: "el perro corre <eos>"
sequence = ["el", "perro", "corre", "<eos>"]
X = [np.eye(input_size)[src_vocab.index(tok)] for tok in sequence]

# Forward pass
h_prev = np.zeros((hidden_size,))
timesteps_rows = []
outputs_rows = []

for t, x_t in enumerate(X, start=1):
    h_t = tanh(Wxh @ x_t + Whh @ h_prev + bh)
    logits = Why @ h_t + by
    probs = softmax(logits)
    pred_idx = int(np.argmax(probs))
    pred_token = tgt_vocab[pred_idx]

    # Store for inspection
    timesteps_rows.append({
        "t": t,
        "input_token": sequence[t-1],
        **{f"h[{i}]": h_t[i] for i in range(hidden_size)}
    })

    outputs_rows.append({
        "t": t,
        "input_token": sequence[t-1],
        **{f"p({tok})": probs[i] for i, tok in enumerate(tgt_vocab)},
        "argmax_token": pred_token
    })

    # roll hidden
    h_prev = h_t

hidden_df = pd.DataFrame(timesteps_rows)
out_df = pd.DataFrame(outputs_rows)

# Display nicely
from caas_jupyter_tools import display_dataframe_to_user
display_dataframe_to_user("Hidden states (size=6) per timestep", hidden_df.round(4))
display_dataframe_to_user("Output distribution over target vocab (size=10) per timestep", out_df.round(4))

# Also print a compact summary for quick view
print("Predicted tokens by timestep:")
for row in outputs_rows:
    print(f"t={row['t']} ({row['input_token']!r}) -> {row['argmax_token']!r}")
