In [61]:
# %%
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import re
from playlist import optimized_pomodoro_playlist

In [None]:
lowest_min_duration = 5
highest_min_duration = 365
increment_error_value = 0.01

In [80]:
# %%
# Token mapping
token_map = {'W': 0, 'S': 1, 'L': 2, 'EOS': 3, 'PAD': 4}
inv_token_map = {v: k for k, v in token_map.items()}

def expand_sequence(seq):
    # Expand patterns like '2×WSWSWL' to 'WSWSWLWSWSWL'
    match = re.match(r"(\d+)×([WSL]+)", seq)
    if match:
        n, pat = match.groups()
        seq = pat * int(n)
    seq = seq.replace('+', '')  # Remove '+'
    return seq

all_lengths = []

mins_list = np.arange(lowest_min_duration, highest_min_duration, increment_error_value)

print(mins_list[highest_min_duration])
print(len(mins_list))


for mins in mins_list:
    result = optimized_pomodoro_playlist(f"{int(mins)}:00", code_format=False)
    sequence = expand_sequence(result['sequence'])
    sequence = re.sub(r'[^WSL]', '', sequence)
    all_lengths.append(len(sequence))
print("Max sequence length:", max(all_lengths)+1)

max_seq_len = max(all_lengths)+1  # Set according to your longest pattern

41.49999999999987
3600
Max sequence length: 17


In [81]:
# %%
def make_example_with_delta(duration_mins):
    result = optimized_pomodoro_playlist(f"{int(duration_mins)}:00", code_format=False)
    sequence = re.sub(r'[^WSL]', '', expand_sequence(result['sequence']))
    seq_tokens = [token_map[ch] for ch in sequence] + [token_map['EOS']]  # EOS at end
    seq_tokens += [token_map['PAD']] * (max_seq_len - len(seq_tokens))
    
    # Durations per token
    durations = []
    delta_labels = []
    w_idx = 0
    short_break = result['short_break']
    long_break = result['long_break'] if result['long_break'] is not None else 0
    for ch in sequence:
        if ch == 'W':
            durations.append(result['work_sessions'][w_idx])
            delta_labels.append(0)
            w_idx += 1
        elif ch == 'S':
            durations.append(short_break)
            delta_labels.append(0)
        elif ch == 'L':
            # Only delta for L tokens: long_break - short_break
            durations.append(long_break)
            delta = max(long_break - short_break, 0)
            delta_labels.append(delta)
    # Pad to max_seq_len
    durations += [0] * (max_seq_len - len(durations))
    delta_labels += [0] * (max_seq_len - len(delta_labels))
    return float(duration_mins) / 360.0, seq_tokens, durations, delta_labels

# Generate dataset
X = []
Y_seq = []
Y_dur = []
Y_delta = []
for mins in mins_list:
    x, y_seq, y_dur, y_delta = make_example_with_delta(mins)
    X.append([x])
    Y_seq.append(y_seq)
    Y_dur.append(y_dur)
    Y_delta.append(y_delta)

# Convert to tensors
X_tensor = torch.tensor(X, dtype=torch.float32)
Y_seq_tensor = torch.tensor(Y_seq, dtype=torch.long)
Y_dur_tensor = torch.tensor(Y_dur, dtype=torch.float32)
Y_delta_tensor = torch.tensor(Y_delta, dtype=torch.float32)

In [82]:
# %%
class PomodoroSeq2SeqDelta(nn.Module):
    def __init__(self, input_dim=1, hidden_dim=64, vocab_size=4, max_len=16):
        super().__init__()
        self.encoder = nn.Linear(input_dim, hidden_dim)
        self.decoder = nn.GRU(hidden_dim, hidden_dim, batch_first=True)
        self.token_head = nn.Linear(hidden_dim, vocab_size)
        self.dur_head = nn.Linear(hidden_dim, 1)
        self.delta_head = nn.Linear(hidden_dim, 1)
        self.max_len = max_len

    def forward(self, x):
        enc = torch.relu(self.encoder(x))
        dec_input = enc.unsqueeze(1).repeat(1, self.max_len, 1)
        output, _ = self.decoder(dec_input)
        token_logits = self.token_head(output)           # (batch, max_len, vocab_size)
        dur_pred = self.dur_head(output).squeeze(-1)     # (batch, max_len)
        delta_pred = torch.relu(self.delta_head(output)).squeeze(-1)   # (batch, max_len), always >= 0
        return token_logits, dur_pred, delta_pred

In [76]:
# %%
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model = PomodoroSeq2SeqDelta(max_len=max_seq_len).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
token_criterion = nn.CrossEntropyLoss(ignore_index=token_map['PAD'])
dur_criterion = nn.MSELoss()
delta_criterion = nn.MSELoss()

X_tensor = X_tensor.to(device)
Y_seq_tensor = Y_seq_tensor.to(device)
Y_dur_tensor = Y_dur_tensor.to(device)
Y_delta_tensor = Y_delta_tensor.to(device)

num_epochs = 10000
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    token_logits, dur_pred, delta_pred = model(X_tensor)
    loss_tokens = token_criterion(token_logits.view(-1, 4), Y_seq_tensor.view(-1))
    loss_dur = dur_criterion(dur_pred, Y_dur_tensor)
    loss_delta = delta_criterion(delta_pred, Y_delta_tensor)
    loss = loss_tokens + loss_dur + loss_delta
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 100 == 0 or epoch == 0:
        print(f"Epoch {epoch+1}/{num_epochs} | Token: {loss_tokens.item():.4f} | Dur: {loss_dur.item():.4f} | Delta: {loss_delta.item():.4f} | Total: {loss.item():.4f}")

cuda


OutOfMemoryError: CUDA out of memory. Tried to allocate 29.71 GiB. GPU 0 has a total capacity of 6.00 GiB of which 82.00 MiB is free. Of the allocated memory 4.75 GiB is allocated by PyTorch, and 144.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [67]:
# %%
def decode_output_delta(token_seq, dur_seq, delta_seq):
    sequence = []
    work_sessions = []
    short_break = None
    long_break = None
    w_idx = 0
    last_short = None
    for i, (tok, dur, delta) in enumerate(zip(token_seq, dur_seq, delta_seq)):
        ch = inv_token_map[tok]
        if ch in ('PAD', 'EOS'):
            break
        sequence.append(ch)
        if ch == 'W':
            work_sessions.append(int(round(dur)))
            w_idx += 1
        elif ch == 'S':
            short_break = int(round(dur))
            last_short = short_break
        elif ch == 'L':
            # Use delta to guarantee long_break > short_break
            long_break = int(round((last_short if last_short is not None else 0) + delta))
    return ''.join(sequence), work_sessions, short_break, long_break

def predict_full_delta(model, duration_mins):
    model.eval()
    with torch.no_grad():
        x = torch.tensor([[duration_mins / 360.0]], dtype=torch.float32).to(device)
        token_logits, dur_pred, delta_pred = model(x)
        pred_tokens = torch.argmax(token_logits, dim=2).cpu().numpy()[0]
        pred_durs = dur_pred.cpu().numpy()[0]
        pred_deltas = delta_pred.cpu().numpy()[0]
        pattern, work_sessions, short_break, long_break = decode_output_delta(pred_tokens, pred_durs, pred_deltas)
        return {
            'duration_minutes': duration_mins,
            'pattern': pattern,
            'work_sessions': work_sessions,
            'short_break': short_break,
            'long_break': long_break
        }

In [68]:
# %%
def format_minutes_to_mmss(minutes: float) -> str:
    total_seconds = round(minutes * 60)
    mm = total_seconds // 60
    ss = total_seconds % 60
    return f"{mm}:{ss:02d}"

def print_prediction_with_loss(result):
    total_scheduled = sum(result['work_sessions'])
    count_S = result['pattern'].count('S')
    count_L = result['pattern'].count('L')
    short_break_total = (result['short_break'] or 0) * count_S
    long_break_total = (result['long_break'] or 0) * count_L
    total_scheduled += short_break_total + long_break_total
    loss = abs(result['duration_minutes'] - total_scheduled)
    print(result)
    print(f"Time loss: {loss:.2f} min ({format_minutes_to_mmss(loss)})\n")


In [69]:
print_prediction_with_loss(predict_full_delta(model, 115))
print_prediction_with_loss(predict_full_delta(model, 75))
print_prediction_with_loss(predict_full_delta(model, 200))
print_prediction_with_loss(predict_full_delta(model, 13.4))

{'duration_minutes': 115, 'pattern': 'WSWSWL', 'work_sessions': [30, 30, 31], 'short_break': 5, 'long_break': 10}
Time loss: 4.00 min (4:00)

{'duration_minutes': 75, 'pattern': 'WSW', 'work_sessions': [27, 32], 'short_break': 5, 'long_break': None}
Time loss: 11.00 min (11:00)

{'duration_minutes': 200, 'pattern': 'WSWSWSWLWSWS', 'work_sessions': [31, 30, 29, 28, 29, 30], 'short_break': 5, 'long_break': 10}
Time loss: 12.00 min (12:00)

{'duration_minutes': 13.4, 'pattern': 'WSW', 'work_sessions': [7, 6], 'short_break': 0, 'long_break': None}
Time loss: 0.40 min (0:24)

