In [None]:
import re
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
from itertools import combinations
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader

# ====================
# 1. PREPROCESAMIENTO
# ====================
def clean_chord_progression(progression):
    """Normaliza acordes y estructura"""
    # Normalizar formato de acordes
    progression = re.sub(r'min', 'm', progression, flags=re.IGNORECASE)
    progression = re.sub(r'sus|dim|aug', '', progression)  # Elimina extensiones avanzadas
    progression = re.sub(r'([A-G])s', r'\1#', progression)  # Fsm -> F#m
    
    # Normalizar estructura
    progression = re.sub(r'(<[a-z]+)_\d+>', r'\1>', progression)  # <verse_1> -> <verse>
    return progression

def preprocess_data(filepath):
    """Carga y limpia el dataset"""
    df = pd.read_csv(filepath)
    
    # Limpieza básica
    df = df[df['chords'].notna() & df['main_genre'].notna()].copy()
    df['chords'] = df['chords'].apply(clean_chord_progression)
    
    # Extraer etiquetas de estructura
    def extract_structure(chord_sequence):
        tags = list(set(re.findall(r'<([a-z]+)>', chord_sequence)))
        return ' '.join(sorted(tags)) if tags else 'verse'  # Default si no hay tags
    
    df['structure'] = df['chords'].apply(extract_structure)
    
    # Crear todas las combinaciones posibles
    all_structures = set()
    for s in df['structure']:
        parts = s.split()
        all_structures.add(s)
        # Añadir sub-combinaciones
        for i in range(1, len(parts)):
            all_structures.add(' '.join(parts[:i]))
    
    # Añadir estructuras comunes manualmente
    common_structures = [
        'intro verse chorus',
        'verse chorus',
        'intro verse bridge chorus',
        'verse prechorus chorus'
    ]
    all_structures.update(common_structures)
    
    # Codificadores
    genre_encoder = LabelEncoder()
    df['genre_id'] = genre_encoder.fit_transform(df['main_genre'])
    
    structure_encoder = LabelEncoder()
    structure_encoder.fit(list(all_structures))
    
    # Manejar casos vacíos o desconocidos
    df['structure'] = df['structure'].apply(
        lambda x: x if x in all_structures else 'verse chorus'
    )
    df['structure_id'] = structure_encoder.transform(df['structure'])
    
    return df, genre_encoder, structure_encoder

# ====================
# 2. DATASET & MODELO
# ====================
class ChordDataset(Dataset):
    def __init__(self, df, chord_to_idx, seq_length=32):
        self.seq_length = seq_length
        self.sequences = []
        
        for _, row in df.iterrows():
            chords = [chord_to_idx.get(c, 1) for c in row['chords'].split()]  # 1 = <unk>
            for i in range(0, len(chords) - seq_length, seq_length//2):
                self.sequences.append({
                    'input': chords[i:i+seq_length],
                    'target': chords[i+1:i+seq_length+1],
                    'genre': row['genre_id'],
                    'structure': row['structure_id']
                })

    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        seq = self.sequences[idx]
        return {
            'input': torch.tensor(seq['input'], dtype=torch.long),
            'target': torch.tensor(seq['target'], dtype=torch.long),
            'genre': torch.tensor(seq['genre'], dtype=torch.long),
            'structure': torch.tensor(seq['structure'], dtype=torch.long)
        }

class ChordTransformer(nn.Module):
    def __init__(self, vocab_size, num_genres, num_structures, d_model=128, nhead=4, num_layers=4):
        super().__init__()
        self.d_model = d_model
        
        # Embeddings con dimensiones ajustadas
        self.chord_embed = nn.Embedding(vocab_size, d_model)
        self.genre_embed = nn.Embedding(num_genres, d_model//4)  # 32-dim
        self.structure_embed = nn.Embedding(num_structures, d_model//4)  # 32-dim
        
        # Capa de proyección para unificar dimensiones
        self.combine = nn.Linear(d_model + d_model//2, d_model)  # 128 + 64 = 192 -> 128
        
        # Transformer
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=d_model*4,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)
        
        self.fc = nn.Linear(d_model, vocab_size)
        
    def forward(self, x, genre, structure):
        # Embeddings
        chord_emb = self.chord_embed(x)  # (batch, seq_len, 128)
        genre_emb = self.genre_embed(genre).unsqueeze(1).expand(-1, x.size(1), -1)  # (batch, seq_len, 32)
        structure_emb = self.structure_embed(structure).unsqueeze(1).expand(-1, x.size(1), -1)  # (batch, seq_len, 32)
        
        # Combinar y proyectar
        combined = torch.cat([chord_emb, genre_emb, structure_emb], dim=-1)  # (batch, seq_len, 192)
        x = self.combine(combined)  # (batch, seq_len, 128)
        
        # Transformer
        x = self.transformer(x)
        return self.fc(x)

# ====================
# 3. ENTRENAMIENTO
# ====================
def train_model(model, dataloader, val_loader=None, epochs=20):
    criterion = nn.CrossEntropyLoss(ignore_index=0)  # Ignora <pad>
    optimizer = optim.AdamW(model.parameters(), lr=3e-4)
    scheduler = optim.lr_scheduler.OneCycleLR(
        optimizer, 
        max_lr=3e-4,
        steps_per_epoch=len(dataloader),
        epochs=epochs
    )
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        
        for batch in tqdm(dataloader, desc=f"Epoch {epoch+1}"):
            inputs = batch['input'].to(device)
            targets = batch['target'].to(device)
            genres = batch['genre'].to(device)
            structures = batch['structure'].to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs, genres, structures)
            loss = criterion(
                outputs.view(-1, outputs.size(-1)), 
                targets.view(-1)
            )
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            train_loss += loss.item()
        
        # Validación
        if val_loader:
            val_loss = evaluate(model, val_loader, criterion)
            print(f"Epoch {epoch+1} | Train Loss: {train_loss/len(dataloader):.4f} | Val Loss: {val_loss:.4f}")
        else:
            print(f"Epoch {epoch+1} | Train Loss: {train_loss/len(dataloader):.4f}")
            
    torch.save(model.state_dict(), "model_final2.pth")

def evaluate(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    
    with torch.no_grad():
        for batch in dataloader:
            inputs = batch['input'].to(device)
            targets = batch['target'].to(device)
            genres = batch['genre'].to(device)
            structures = batch['structure'].to(device)
            
            outputs = model(inputs, genres, structures)
            loss = criterion(
                outputs.view(-1, outputs.size(-1)), 
                targets.view(-1)
            )
            total_loss += loss.item()
    
    return total_loss / len(dataloader)

# ====================
# 4. GENERACIÓN
# ====================

# ====================
# 5. EJECUCIÓN
# ====================
if __name__ == "__main__":
    # Configuración
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    torch.manual_seed(42)
    
    # 1. Preprocesamiento
    print("Preprocesando datos...")
    df, genre_encoder, structure_encoder = preprocess_data("data/chordomicon_clean.csv")
    
    # 2. Vocabulario
    all_chords = list(set([c for s in df['chords'] for c in s.split()]))
    vocab = ['<pad>', '<unk>'] + sorted(all_chords)
    vocab_size = len(vocab)
    chord_to_idx = {v: i for i, v in enumerate(vocab)}
    idx_to_chord = {i: v for i, v in enumerate(vocab)}
    
    # 3. Datasets
    print("Creando datasets...")
    train_df = df.sample(frac=0.8)
    val_df = df.drop(train_df.index)
    
    train_dataset = ChordDataset(train_df, chord_to_idx)
    val_dataset = ChordDataset(val_df, chord_to_idx)
    
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=64)
    
    # 4. Modelo
    print("Inicializando modelo...")
    model = ChordTransformer(
        vocab_size=vocab_size,
        num_genres=len(genre_encoder.classes_),
        num_structures=len(structure_encoder.classes_)
    ).to(device)
    
    # 5. Entrenamiento
    print("Comenzando entrenamiento...")
    train_model(model, train_loader, val_loader, epochs=20)
    
    

In [None]:
def generate_chords(model, genre, structure, start_seq="<intro> <verse> <chorus>", max_len=32, temperature=0.7):
    model.eval()
    
    # Validar y formatear estructura
    if not isinstance(structure, str) or not structure.strip():
        structure = "verse chorus"
    
    structure_parts = [s for s in structure.split() if s]
    formatted_structure = ' '.join(
        f'<{p}>' if not p.startswith('<') else p 
        for p in structure_parts
    )
    
    # Codificar con manejo de errores
    try:
        structure_id = structure_encoder.transform([formatted_structure])[0]
    except ValueError:
        known_structures = structure_encoder.classes_
        fallback = 'verse chorus' if 'verse chorus' in known_structures else known_structures[0]
        print(f"Estructura '{structure}' no válida. Usando '{fallback}'")
        structure_id = structure_encoder.transform([fallback])[0]
    
    # Resto de la generación...
    genre_id = torch.tensor([genre_encoder.transform([genre])[0]], device=device)
    input_seq = [chord_to_idx.get(s, 1) for s in start_seq.split() if s]
    
    if not input_seq:  # Si start_seq está vacío
        input_seq = [chord_to_idx.get('<intro>', 1)]
        
    input_tensor = torch.tensor([input_seq], dtype=torch.long, device=device)
    generated = []
    with torch.no_grad():
        for _ in range(max_len):
            output = model(input_tensor, genre_id, torch.tensor([structure_id], device=device))
            probs = torch.softmax(output[0, -1] / temperature, dim=-1)
            next_idx = torch.multinomial(probs, 1).item()
            next_chord = idx_to_chord[next_idx]
            
            generated.append(next_chord)
            input_tensor = torch.cat([input_tensor, torch.tensor([[next_idx]], device=device)], dim=1)
            
            if next_chord.startswith('<') and len(generated) > 5:
                break
    
    return ' '.join(generated)

In [29]:
# Ejemplo con estructura compleja
progresion = generate_chords(
    model=model,
    genre="rock",
    structure="intro verse chorus",  # Combinación no vista en entrenamiento
    start_seq=""
)
print(progresion)

Estructura 'intro verse chorus' no válida. Usando 'verse chorus'
<intro> <intro> <intro> <intro> <intro> <intro>


In [21]:
# Después de entrenar el modelo y crear el structure_encoder
print("Estructuras disponibles:")
for i, structure in enumerate(structure_encoder.classes_):
    print(f"{i}: {structure}")

# También puedes obtenerlas como lista
estructuras_disponibles = list(structure_encoder.classes_)

Estructuras disponibles:
0: bridge
1: bridge chorus
2: bridge chorus instrumental
3: bridge chorus instrumental interlude
4: bridge chorus instrumental interlude intro
5: bridge chorus instrumental interlude intro outro
6: bridge chorus instrumental interlude intro outro solo
7: bridge chorus instrumental interlude intro outro solo verse
8: bridge chorus instrumental interlude intro outro verse
9: bridge chorus instrumental interlude intro solo
10: bridge chorus instrumental interlude intro solo verse
11: bridge chorus instrumental interlude intro verse
12: bridge chorus instrumental interlude outro
13: bridge chorus instrumental interlude outro solo
14: bridge chorus instrumental interlude outro solo verse
15: bridge chorus instrumental interlude outro verse
16: bridge chorus instrumental interlude solo
17: bridge chorus instrumental interlude solo verse
18: bridge chorus instrumental interlude verse
19: bridge chorus instrumental intro
20: bridge chorus instrumental intro outro
21: b

In [22]:
df = df.iloc[1:].reset_index(drop=True)
print(df)

                                                   chords   main_genre  \
0       <intro> E D A E D A <verse> E D A E D A E D A ...        metal   
1       <intro> C <verse> G C G C <chorus> F Dm G Dm G...          pop   
2       <intro> G Bm Am D G Bm <verse> Am D G Em Am D ...          pop   
3       <intro> F#m F B E F B E F#m B <chorus> A G#m B...          pop   
4       <chorus> C Am Dm G C G Am Dm G C <verse> Dm C ...          pop   
...                                                   ...          ...   
277919  B C F E B E B F E B F E B E B E B D#m E B F E ...         punk   
277920  A D E A D E A D E A D E A D E A D E A D E A D ...  alternative   
277921  C G Am F C G Am F C Am G Am F Fm C Am C Am F F...          pop   
277922  D#m C#m A#m D#m C# A#m G C#m Fm D# Fm C#m Fm D...         rock   
277923  Dm A# C Dm A# C Dm A# C Dm F G# C# F# Dm Gm C ...         punk   

                        structure  genre_id  structure_id  
0       bridge chorus intro verse         4        

In [23]:
# 5. Entrenamiento
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.8)

def train_epoch(model, dataloader):
    model.train()
    total_loss = 0
    
    for i, batch in enumerate(tqdm(dataloader)):
        inputs = batch['input'].to(device)
        targets = batch['target'].to(device)
        genres = batch['genre'].to(device)
        structures = batch['structure'].to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs, genres, structures)
        
        loss = criterion(outputs.view(-1, vocab_size), targets.view(-1))
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(dataloader)

for epoch in range(10):
    loss = train_epoch(model, dataloader)
    scheduler.step()
    print(f'Epoch {epoch+1}, Loss: {loss:.4f}')

torch.save(model.state_dict(), "model_test.pth")

NameError: name 'dataloader' is not defined

In [None]:
def convert_structure_format(text):
    """Convierte estructuras en texto a formato compatible"""
    parts = text.split()
    return ' '.join(f'<{p}>' for p in parts) if not text.startswith('<') else text

def safe_structure_transform(text):
    """Transforma estructuras manejando casos desconocidos"""
    formatted = convert_structure_format(text)
    try:
        return structure_encoder.transform([formatted])[0]
    except ValueError:
        default = '<verse> <chorus>'
        print(f"Estructura '{text}' no encontrada. Usando default: {default}")
        return structure_encoder.transform([default])[0]
    
def generate_chords(genre, structure, start_seq="<intro>", max_len=32, temperature=0.8):
    model.eval()
    
    # 1. Preprocesamiento seguro de inputs
    genre_id = genre_encoder.transform([genre])[0]
    structure_id = safe_structure_transform(structure)  # ¡Usamos la nueva función!
    
    # 2. Convertir secuencia inicial a índices
    input_seq = [chord_to_idx.get(s, chord_to_idx['<unk>']) for s in start_seq.split()]
    
    # 3. Preparar tensores
    input_tensor = torch.tensor([input_seq], dtype=torch.long).to(device)
    genre_tensor = torch.tensor([genre_id], dtype=torch.long).to(device)
    structure_tensor = torch.tensor([structure_id], dtype=torch.long).to(device)
    
    # 4. Generación
    generated = []
    with torch.no_grad():
        for _ in range(max_len):
            output = model(input_tensor, genre_tensor, structure_tensor)
            probs = torch.softmax(output[0, -1] / temperature, dim=-1)
            next_idx = torch.multinomial(probs, 1).item()
            next_chord = idx_to_chord[next_idx]
            
            generated.append(next_chord)
            input_tensor = torch.cat([
                input_tensor, 
                torch.tensor([[next_idx]], device=device)
            ], dim=1)
            
            # Detener si se completa una estructura
            if next_chord.startswith('<') and len(generated) > 5:
                break
    
    # 5. Post-procesamiento
    return ' '.join(generated)

progresion = generate_chords(
    genre="jazz",
    structure="intro verse verse chorus verse verse",  # ¡Sin < >!
    start_seq="C Am",
    temperature=0.2
)
print(progresion)