In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
import numpy as np
import json
from tqdm import tqdm

# Cargar datos preprocesados
with open('data/dataset_tonality_chords.json') as f:
    data = json.load(f)

# Preparar encoder para acordes
all_chords = list(set(c for song in data['songs'] for c in song['chords']))
chord_encoder = LabelEncoder()
chord_encoder.fit(all_chords)

# Preparar encoder para tonalidades
tonalities = list(set(song['tonality'] for song in data['songs']))
tonality_encoder = LabelEncoder()
tonality_encoder.fit(tonalities)

# Parámetros
SEQUENCE_LENGTH = 8  # Longitud de secuencias de entrada
BATCH_SIZE = 32

class ChordGenerator(nn.Module):
    def __init__(self, vocab_size, tonality_size, pad_token, embedding_dim=64, hidden_dim=128):
        super().__init__()
        # Embeddings
        self.tonality_embedding = nn.Embedding(tonality_size, embedding_dim)
        self.chord_embedding = nn.Embedding(vocab_size, embedding_dim)
        
        # Decodificador LSTM
        self.lstm = nn.LSTM(embedding_dim * 2, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)
        
        # Token especial para inicio de secuencia
        self.sos_token = vocab_size  # Añadiremos esto al vocabulario

        self.pad_token = pad_token

        self.embedding = nn.Embedding(
            vocab_size + 2,
            embedding_dim,
            padding_idx=pad_token
        )
        
    def forward(self, tonality, padded_sequences, lengths, max_length=16, temperature=1.0):
        # Crear mask para ignorar padding
        mask = (padded_sequences != self.pad_token)
        
        # Embeddings
        seq_emb = self.embedding(padded_sequences)
        tonality_emb = self.tonality_embedding(tonality).unsqueeze(1)
        tonality_emb = tonality_emb.expand(-1, seq_emb.size(1), -1)
        
        combined = torch.cat([seq_emb, tonality_emb], dim=-1)
        
        # Empacar secuencias para mejor eficiencia
        packed = nn.utils.rnn.pack_padded_sequence(
            combined,
            lengths.cpu(),
            batch_first=True,
            enforce_sorted=True
        )

        batch_size = tonality.size(0)
        
        # Embedding de la tonalidad
        tonality_emb = self.tonality_embedding(tonality)  # (batch, emb_dim)
        
        # Inicializar secuencia con SOS
        input_chord = torch.full((batch_size, 1), self.sos_token, 
                               dtype=torch.long, device=tonality.device)
        outputs = []
        
        # Estado inicial (usamos la tonalidad como contexto)
        h = tonality_emb.unsqueeze(0).repeat(2, 1, 1)  # Para LSTM bidireccional
        c = torch.zeros_like(h)
        
        for _ in range(max_length):
            # Embedding del acorde actual
            chord_emb = self.chord_embedding(input_chord[:, -1:])  # (batch, 1, emb_dim)
            
            # Combinar con tonalidad
            combined = torch.cat([
                chord_emb, 
                tonality_emb.unsqueeze(1).expand(-1, chord_emb.size(1), -1)
            ], dim=-1)
            
            # Paso por LSTM
            lstm_out, (h, c) = self.lstm(combined, (h, c))
            
            # Predecir siguiente acorde
            logits = self.fc(lstm_out.squeeze(1))
            probs = F.softmax(logits / temperature, dim=-1)
            next_chord = torch.multinomial(probs, 1)
            
            outputs.append(next_chord)
            input_chord = torch.cat([input_chord, next_chord], dim=1)
        
        return torch.cat(outputs, dim=1)
    

In [11]:
from torch.nn.utils.rnn import pad_sequence

class TonalityDataset(Dataset):
    def __init__(self, songs, chord_encoder, tonality_encoder, max_length=16):
        self.songs = songs
        self.chord_encoder = chord_encoder
        self.tonality_encoder = tonality_encoder
        self.max_length = max_length
        self.sos_token = len(chord_encoder.classes_)
        self.pad_token = self.sos_token + 1  # Nuevo token para padding
    
    def __len__(self):
        return len(self.songs)
    
    def __getitem__(self, idx):
        song = self.songs[idx]
        chords = song['chords'][:self.max_length-1]  # -1 para el SOS
        
        # Codificar y añadir SOS
        encoded = [self.sos_token] + self.chord_encoder.transform(chords).tolist()
        
        return {
            'tonality': torch.tensor(self.tonality_encoder.transform([song['tonality']])[0]),
            'sequence': torch.tensor(encoded),
            'length': len(encoded)  # Guardamos la longitud real
        }

    def collate_fn(self, batch):
        # Ordenar por longitud (descendente) para packed sequences
        batch.sort(key=lambda x: x['length'], reverse=True)
        
        tonality = torch.stack([x['tonality'] for x in batch])
        sequences = [x['sequence'] for x in batch]
        lengths = torch.tensor([x['length'] for x in batch])
        
        # Aplicar padding
        sequences_padded = pad_sequence(
            sequences,
            batch_first=True,
            padding_value=self.pad_token
        )
        
        return {
            'tonality': tonality,
            'padded_sequences': sequences_padded,
            'lengths': lengths
        }
    
dataset = TonalityDataset(data['songs'], chord_encoder, tonality_encoder)
dataloader = DataLoader(
    dataset,
    batch_size=32,
    shuffle=True,
    collate_fn=dataset.collate_fn  # ¡Usamos nuestra función personalizada!
)

NameError: name 'DataLoader' is not defined

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train_model(model, dataloader, epochs=50, lr=0.001):
    # Configuración inicial
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    
    criterion = nn.CrossEntropyLoss(ignore_index=model.pad_token)  # Ignora los tokens de padding
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    # Historial de métricas
    train_loss = []
    
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0.0
        
        # Barra de progreso
        progress_bar = tqdm(dataloader, desc=f'Epoch {epoch+1}/{epochs}')
        
        for batch in progress_bar:
            # 1. Preparar datos
            tonalities = batch['tonality'].to(device)
            sequences = batch['padded_sequences'].to(device)
            lengths = batch['lengths'].to(device)
            
            # 2. Separar inputs (todo excepto último acorde) y targets (todo excepto primer SOS)
            inputs = sequences[:, :-1]
            targets = sequences[:, 1:]
            
            # 3. Forward pass
            optimizer.zero_grad()
            outputs = model(
                tonality=tonalities,
                padded_sequences=inputs,
                lengths=lengths-1  # -1 porque quitamos un elemento
            )
            
            # 4. Calcular pérdida (aplanamos las secuencias)
            loss = criterion(
                outputs.view(-1, outputs.size(-1)),  # (batch*seq_len, vocab_size)
                targets.reshape(-1)                  # (batch*seq_len)
            )
            
            # 5. Backward pass
            loss.backward()
            optimizer.step()
            
            # 6. Actualizar métricas
            epoch_loss += loss.item()
            progress_bar.set_postfix({'loss': f'{loss.item():.4f}'})
        
        # 7. Guardar métricas de la época
        avg_loss = epoch_loss / len(dataloader)
        train_loss.append(avg_loss)
        print(f'Epoch {epoch+1} - Avg Loss: {avg_loss:.4f}')
    
    return train_loss

# Hiperparámetros
VOCAB_SIZE = len(chord_encoder.classes_) + 2  # +2 para SOS y PAD
TONALITY_SIZE = len(tonality_encoder.classes_)
PAD_TOKEN = VOCAB_SIZE - 1  # El último token es para padding

# Modelo
model = ChordGenerator(
    vocab_size=VOCAB_SIZE,
    tonality_size=TONALITY_SIZE,
    pad_token=PAD_TOKEN
)

# Entrenamiento
train_loss = train_model(model, dataloader, epochs=50)
# Guardar el modelo
torch.save(model.state_dict(), 'chord_generator.pth')

TypeError: ChordGenerator.__init__() got an unexpected keyword argument 'pad_token'