# Generador de música
### Aquest projecte serveix per generar notes musicals seguint una estructura segons el gènere musical seleccionat

Importem els paquets necessaris

In [None]:
import os
from pathlib import Path

def obtener_track_id_desde_path(midi_path):
    path = Path(midi_path)
    return path.parent.name if len(path.parts) >= 2 else None

print(obtener_track_id_desde_path("lmd_matched/A/A/A/TRAAAGR128F425B14B/1d9d16a9da90c090809c153754823c2b.mid"))

In [None]:
import os
import json

LMD_MATCHED_DIR = 'lmd_matched'
MSD_METADATA_DIR = 'lastfm_train'

# Géneros que te interesan
COMMON_GENRES = {
    'rock', 'pop', 'jazz', 'blues', 'hip hop', 'rap', 'electronic',
    'dance', 'classical', 'metal', 'punk', 'country', 'folk',
    'reggae', 'r&b', 'soul', 'disco', 'house', 'techno', 'funk',
    'alternative', 'indie', 'grunge', 'trance', 'synthpop', 'electropop'
}

# Normaliza el texto del género
def normalize_genre(genre):
    return genre.lower().strip()

# Carga los metadatos y extrae el género válido
def extract_genre(mbid):
    metadata_path = os.path.join(MSD_METADATA_DIR, mbid + '.json')
    if not os.path.exists(metadata_path):
        return None

    with open(metadata_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
        print(data)

    tags = data.get('tags', [])
    for tag_entry in tags:
        if isinstance(tag_entry, list) and len(tag_entry) >= 1:
            tag = tag_entry[0].lower().strip()
            if tag in COMMON_GENRES:
                return tag
    return None

# Asociación final: solo mbid, midi_path y género
midi_with_genres = []

for root, _, files in os.walk(LMD_MATCHED_DIR):
    for file in files:
        if file.lower().endswith('.mid') or file.lower().endswith('.midi'):
            mbid = os.path.basename(root)
            midi_path = os.path.join(root, file)
            genre = extract_genre(mbid)

            if genre:
                midi_with_genres.append({
                    'mbid': mbid,
                    'midi_path': midi_path,
                    'genre': genre
                })

# Guardamos el resultado
with open('lmd_genre_filtered.json', 'w') as out_f:
    json.dump(midi_with_genres, out_f, indent=2)

print(f"Total MIDI con género válido: {len(midi_with_genres)}")


In [None]:
import pretty_midi

midi = pretty_midi.PrettyMIDI("Midi Dataset Clean/ARMSTRONG_LOUIS/What_a_Wonderful_World.mid")

In [None]:
from music21 import converter, chord
score = converter.parse("Midi Dataset Clean/ARMSTRONG_LOUIS/What_a_Wonderful_World.mid")
chords = score.chordify()

print(chords)

In [None]:
import os
import json
import pretty_midi

def extract_chords_from_midi(midi_path):
    try:
        midi_data = pretty_midi.PrettyMIDI(midi_path)
        chords = set()

        for instrument in midi_data.instruments:
            if instrument.is_drum:
                continue
            notes = instrument.notes
            if len(notes) < 3:
                continue

            # Agrupar por tiempo cercano para detectar acordes
            notes.sort(key=lambda note: note.start)
            current_chord = []
            last_start = None
            for note in notes:
                if last_start is None or abs(note.start - last_start) < 0.05:
                    current_chord.append(note.pitch)
                    last_start = note.start
                else:
                    if len(current_chord) >= 3:
                        chord = tuple(sorted(current_chord))
                        chords.add(chord)
                    current_chord = [note.pitch]
                    last_start = note.start
            if len(current_chord) >= 3:
                chord = tuple(sorted(current_chord))
                chords.add(chord)

        return [list(chord) for chord in chords]
    except Exception as e:
        print(f"Error en {midi_path}: {e}")
        return []

# Ruta al directorio raíz
root_dir = "Midi Dataset Clean"
output = {}

for dirpath, dirnames, filenames in os.walk(root_dir):
    for filename in filenames:
        if filename.lower().endswith((".mid", ".midi")):
            midi_path = os.path.join(dirpath, filename)
            chords = extract_chords_from_midi(midi_path)
            if chords:
                output[os.path.relpath(midi_path, root_dir)] = chords

# Guardar a un JSON
with open("midi_chords.json", "w") as f:
    json.dump(output, f, indent=2)


In [None]:
from music21 import chord, pitch

def midi_notes_to_chord_name(notes):
    try:
        c = chord.Chord(notes)
        return c.figure  # Ej: "C", "Am", "G7"
    except Exception:
        return None


In [None]:
with open("midi_chords.json", "r") as f:
    data = json.load(f)

sequences = []
for midi_path, chords in data.items():
    sequence = []
    for chord_notes in chords:
        name = midi_notes_to_chord_name(chord_notes)
        if name:
            sequence.append(name)
    if sequence:
        sequences.append(sequence)


In [None]:
from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences

tokenizer = Tokenizer(filters='', lower=False)
tokenizer.fit_on_texts(sequences)
sequences_int = tokenizer.texts_to_sequences(sequences)

max_len = max(len(seq) for seq in sequences_int)
padded_sequences = pad_sequences(sequences_int, maxlen=max_len, padding='post')


In [3]:
import json
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import random
from collections import defaultdict

# ========= CONFIG =========
SEQ_LEN = 16
BATCH_SIZE = 64
HIDDEN_SIZE = 128
EPOCHS = 30
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ========= CARGA Y PROCESADO =========
with open("midi_chords.json", "r") as f:
    raw_data = json.load(f)

# Convertimos acordes a tuplas para que sean hasheables
all_chords = []
for chords in raw_data.values():
    for chord in chords:
        chord_tuple = tuple(sorted(chord))  # ordena para normalizar
        all_chords.append(chord_tuple)

# Creamos vocabulario
unique_chords = sorted(list(set(all_chords)))
chord2idx = {ch: i for i, ch in enumerate(unique_chords)}
idx2chord = {i: ch for ch, i in chord2idx.items()}

# Convertimos a índices
encoded_data = []
for chords in raw_data.values():
    sequence = [chord2idx[tuple(sorted(ch))] for ch in chords]
    encoded_data.append(sequence)

# Construimos pares input-output
inputs, targets = [], []
for seq in encoded_data:
    for i in range(len(seq) - SEQ_LEN):
        inputs.append(seq[i:i+SEQ_LEN])
        targets.append(seq[i+SEQ_LEN])

# ========= DATASET =========
class ChordDataset(Dataset):
    def __init__(self, x, y):
        self.x = torch.tensor(x, dtype=torch.long)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

dataset = ChordDataset(inputs, targets)
loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# ========= MODELO =========
class ChordRNN(nn.Module):
    def __init__(self, vocab_size, hidden_size):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        x = self.embed(x)
        output, _ = self.lstm(x)
        output = self.fc(output[:, -1])
        return output

model = ChordRNN(len(chord2idx), HIDDEN_SIZE).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

# ========= ENTRENAMIENTO =========
for epoch in range(EPOCHS):
    total_loss = 0
    for x_batch, y_batch in loader:
        x_batch, y_batch = x_batch.to(DEVICE), y_batch.to(DEVICE)

        optimizer.zero_grad()
        output = model(x_batch)
        loss = loss_fn(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {total_loss:.4f}")

# ========= GENERACIÓN =========
def generate_sequence(start_seq, length):
    model.eval()
    result = start_seq[:]
    input_seq = torch.tensor(start_seq, dtype=torch.long).unsqueeze(0).to(DEVICE)

    for _ in range(length):
        if input_seq.size(1) < SEQ_LEN:
            padded = torch.zeros((1, SEQ_LEN), dtype=torch.long).to(DEVICE)
            padded[0, -input_seq.size(1):] = input_seq
            input_seq = padded

        pred = model(input_seq[:, -SEQ_LEN:])
        next_idx = torch.argmax(pred, dim=1).item()
        result.append(next_idx)
        input_seq = torch.tensor(result[-SEQ_LEN:], dtype=torch.long).unsqueeze(0).to(DEVICE)

    return [idx2chord[i] for i in result]

# ========= GENERAR CON ESTRUCTURA =========
structure = {
    "estrofa": 8,
    "puente": 4,
    "estribillo": 8
}

order = ["estrofa", "estrofa", "puente", "estribillo", "estrofa", "estrofa", "puente", "estribillo"]

start = random.choice(inputs)
generated = []

for part in order:
    length = structure[part]
    gen_part = generate_sequence(start[-SEQ_LEN:], length)
    generated.extend(gen_part)
    start = [chord2idx[tuple(ch)] for ch in gen_part]

# ========= GUARDAR =========
with open("acordes_generados.json", "w") as f:
    json.dump([list(ch) for ch in generated], f, indent=2)

print("Generación completada.")


Epoch 1/30 - Loss: 67832.2207
Epoch 2/30 - Loss: 54513.7836
Epoch 3/30 - Loss: 46864.9474
Epoch 4/30 - Loss: 40720.0186
Epoch 5/30 - Loss: 35599.1704
Epoch 6/30 - Loss: 31300.9487
Epoch 7/30 - Loss: 27628.9948
Epoch 8/30 - Loss: 24480.4032
Epoch 9/30 - Loss: 21803.0740
Epoch 10/30 - Loss: 19521.0069
Epoch 11/30 - Loss: 17621.4202
Epoch 12/30 - Loss: 16060.9486
Epoch 13/30 - Loss: 14809.5517
Epoch 14/30 - Loss: 13748.8691
Epoch 15/30 - Loss: 12911.5473
Epoch 16/30 - Loss: 12160.1785
Epoch 17/30 - Loss: 11550.4486
Epoch 18/30 - Loss: 11039.9253
Epoch 19/30 - Loss: 10565.0929
Epoch 20/30 - Loss: 10162.0410
Epoch 21/30 - Loss: 9807.5783
Epoch 22/30 - Loss: 9498.8634
Epoch 23/30 - Loss: 9225.2211
Epoch 24/30 - Loss: 8981.4779
Epoch 25/30 - Loss: 8752.4094
Epoch 26/30 - Loss: 8570.6748
Epoch 27/30 - Loss: 8395.5724
Epoch 28/30 - Loss: 8237.6612
Epoch 29/30 - Loss: 8092.2417
Epoch 30/30 - Loss: 7962.8019
Generación completada.


In [5]:
import json

# Cargar el archivo JSON con secuencias de notas
with open("acordes_generados.json", "r") as f:
    data = json.load(f)

# Convertimos a acordes válidos
chord_sequence = []

for notes in data:
    # Eliminamos repeticiones y ordenamos
    unique_notes = sorted(list(set(notes)))
    
    # Consideramos solo como acordes los que tengan al menos 2 notas
    if len(unique_notes) >= 2:
        chord_sequence.append(unique_notes)

# Guardamos el resultado
with open("acordes_filtrados.json", "w") as f:
    json.dump(chord_sequence, f, indent=2)

print(f"{len(chord_sequence)} acordes extraídos.")


184 acordes extraídos.
