Preprocessar dades per al model

In [56]:
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
import numpy as np
import json

# Cargar datos preprocesados
with open('data/dataset_tonality_chords.json') as f:
    data = json.load(f)

# Preparar encoder para acordes
all_chords = list(set(c for song in data['songs'] for c in song['chords']))
chord_encoder = LabelEncoder()
chord_encoder.fit(all_chords)

# Preparar encoder para tonalidades
tonalities = list(set(song['tonality'] for song in data['songs']))
tonality_encoder = LabelEncoder()
tonality_encoder.fit(tonalities)

# Parámetros
SEQUENCE_LENGTH = 8  # Longitud de secuencias de entrada
BATCH_SIZE = 32

Clase Dataset

In [57]:
class ChordDataset(Dataset):
    def __init__(self, songs, chord_encoder, tonality_encoder, seq_length):
        self.songs = songs
        self.chord_encoder = chord_encoder
        self.tonality_encoder = tonality_encoder
        self.seq_length = seq_length
        self.sequences = self._create_sequences()
        
    def _create_sequences(self):
        sequences = []
        for song in self.songs:
            chords = song['chords']
            tonality = song['tonality']
            
            # Codificar acordes y tonalidad
            encoded_chords = self.chord_encoder.transform(chords)
            encoded_tonality = self.tonality_encoder.transform([tonality])[0]
            
            # Crear secuencias deslizantes
            for i in range(len(encoded_chords) - self.seq_length):
                seq = encoded_chords[i:i+self.seq_length]
                target = encoded_chords[i+self.seq_length]
                sequences.append({
                    'input_seq': seq,
                    'tonality': encoded_tonality,
                    'target': target
                })
        return sequences
    
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        seq = self.sequences[idx]
        return {
            'input_seq': torch.tensor(seq['input_seq'], dtype=torch.long),
            'tonality': torch.tensor(seq['tonality'], dtype=torch.long),
            'target': torch.tensor(seq['target'], dtype=torch.long)
        }

# Crear datasets
dataset = ChordDataset(data['songs'], chord_encoder, tonality_encoder, SEQUENCE_LENGTH)
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

KeyboardInterrupt: 

Model

In [7]:
class ChordLSTM(torch.nn.Module):
    def __init__(self, vocab_size, tonality_size, embedding_dim=64, hidden_dim=128):
        super().__init__()
        self.chord_embedding = torch.nn.Embedding(vocab_size, embedding_dim)
        self.tonality_embedding = torch.nn.Embedding(tonality_size, embedding_dim)
        self.lstm = torch.nn.LSTM(embedding_dim * 2, hidden_dim, batch_first=True)
        self.fc = torch.nn.Linear(hidden_dim, vocab_size)
        
    def forward(self, x, tonality):
        # Embeddings
        chord_emb = self.chord_embedding(x)
        tonality_emb = self.tonality_embedding(tonality).unsqueeze(1).expand(-1, x.size(1), -1)
        
        # Concatenar
        combined = torch.cat([chord_emb, tonality_emb], dim=-1)
        
        # LSTM
        lstm_out, _ = self.lstm(combined)
        
        # Solo la última posición temporal
        last_out = lstm_out[:, -1, :]
        
        return self.fc(last_out)

# Inicializar modelo
model = ChordLSTM(
    vocab_size=len(chord_encoder.classes_),
    tonality_size=len(tonality_encoder.classes_)
)

Train loop

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Función de entrenamiento
def train(model, dataloader, epochs=50):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for batch in dataloader:
            inputs = batch['input_seq'].to(device)
            tonalities = batch['tonality'].to(device)
            targets = batch['target'].to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs, tonalities)
            loss = criterion(outputs, targets)
            
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        
        print(f'Epoch {epoch+1}, Loss: {total_loss/len(dataloader):.4f}')
    torch.save(model.state_dict(), "model_final.pth")

# Ejecutar entrenamiento
train(model, train_loader)

  return torch._C._cuda_getDeviceCount() > 0


Epoch 1, Loss: 1.5406
Epoch 2, Loss: 1.5002
Epoch 3, Loss: 1.4866
Epoch 4, Loss: 1.4789
Epoch 5, Loss: 1.4739
Epoch 6, Loss: 1.4706
Epoch 7, Loss: 1.4681
Epoch 8, Loss: 1.4663
Epoch 9, Loss: 1.4650
Epoch 10, Loss: 1.4641
Epoch 11, Loss: 1.4631
Epoch 12, Loss: 1.4627
Epoch 13, Loss: 1.4623
Epoch 14, Loss: 1.4618
Epoch 15, Loss: 1.4618
Epoch 16, Loss: 1.4616
Epoch 17, Loss: 1.4615
Epoch 18, Loss: 1.4614
Epoch 19, Loss: 1.4616
Epoch 20, Loss: 1.4612
Epoch 21, Loss: 1.4613
Epoch 22, Loss: 1.4616
Epoch 23, Loss: 1.4615
Epoch 24, Loss: 1.4621
Epoch 25, Loss: 1.4619
Epoch 26, Loss: 1.4624
Epoch 27, Loss: 1.4626
Epoch 28, Loss: 1.4628
Epoch 29, Loss: 1.4629
Epoch 30, Loss: 1.4633
Epoch 31, Loss: 1.4638
Epoch 32, Loss: 1.4641
Epoch 33, Loss: 1.4645
Epoch 34, Loss: 1.4648
Epoch 35, Loss: 1.4655
Epoch 36, Loss: 1.4654
Epoch 37, Loss: 1.4658
Epoch 38, Loss: 1.4665
Epoch 39, Loss: 1.4668
Epoch 40, Loss: 1.4667
Epoch 41, Loss: 1.4674
Epoch 42, Loss: 1.4676
Epoch 43, Loss: 1.4684
Epoch 44, Loss: 1.46

Test loop

In [111]:
def generate_chords(model, starting_chords, tonality, length=10, temperature=0.7):
    model.eval()
    with torch.no_grad():
        # Codificar entrada
        encoded_tonality = tonality_encoder.transform([tonality])[0]
        current_seq = chord_encoder.transform(starting_chords)
        
        for _ in range(length):
            inputs = torch.tensor([current_seq[-SEQUENCE_LENGTH:]], dtype=torch.long).to(device)
            tonality_tensor = torch.tensor([encoded_tonality], dtype=torch.long).to(device)
            
            # Predicción
            output = model(inputs, tonality_tensor)
            probs = torch.softmax(output / temperature, dim=-1)
            next_chord = torch.multinomial(probs, 1).item()
            
            current_seq = np.append(current_seq, next_chord)
        
        return chord_encoder.inverse_transform(current_seq)

# Ejemplo de uso
starting_progression = ['C']
tonality = 'C_MAJOR'
model.load_state_dict(torch.load("model_final.pth"))
generated = generate_chords(model, starting_progression, tonality, length=10)
print("Progresión generada:", generated)

Progresión generada: ['C' 'Cm' 'C' 'Cm' 'C' 'Cm' 'C' 'Cm' 'C' 'Cm' 'C']
