Extreure els acords dels arxius .mid a un .json

In [None]:
import os
import json
import pretty_midi
import music21
from tqdm import tqdm

def midi_notes_to_chord_name(pitches):
    try:
        notes = [music21.note.Note(midi=p) for p in pitches]
        chord = music21.chord.Chord(notes)
        return chord.pitchedCommonName  # Ej: 'C major triad'
    except Exception:
        return None

def extract_chords(midi_file):
    try:
        midi_data = pretty_midi.PrettyMIDI(midi_file)
        chords = []

        for instrument in midi_data.instruments:
            if instrument.is_drum:
                continue

            notes_by_start = {}
            for note in instrument.notes:
                start_rounded = round(note.start, 2)
                if start_rounded not in notes_by_start:
                    notes_by_start[start_rounded] = []
                notes_by_start[start_rounded].append(note.pitch)

            for time in sorted(notes_by_start.keys()):
                pitches = sorted(list(set(notes_by_start[time])))
                if len(pitches) > 1:
                    chord_name = midi_notes_to_chord_name(pitches)
                    if chord_name:
                        chords.append(chord_name)

        return chords
    except Exception as e:
        print(f"Error procesando {midi_file}: {e}")
        return []

def process_dataset(root_folder):
    data = {}
    for artist in tqdm(os.listdir(root_folder)):
        artist_path = os.path.join(root_folder, artist)
        if not os.path.isdir(artist_path):
            continue
        data[artist] = {}
        for song in os.listdir(artist_path):
            if song.endswith(".mid"):
                song_path = os.path.join(artist_path, song)
                chords = extract_chords(song_path)
                if chords:
                    data[artist][song] = chords
    return data

# Ruta de entrada y salida
dataset_path = "Midi Dataset Clean"
output_file = "data/raw_chords_dataset.json"

# Procesamiento
chord_data = process_dataset(dataset_path)

# Guardar a JSON
with open(output_file, "w") as f:
    json.dump(chord_data, f, indent=2)

print(f"Dataset con nombres de acordes guardado en: {output_file}")


Simplificar acords i treure duplicats

In [None]:
import json
import re

# Cargar archivo original
with open("data/raw_chords_dataset.json", "r") as f:
    full_data = json.load(f)

# Reglas para notas válidas
valid_notes = ["C", "C#", "Db", "D", "D#", "Eb", "E", "F", "F#", "Gb", "G", "G#", "Ab", "A", "A#", "Bb", "B"]

def extract_note(text):
    """Encuentra la nota más cercana a la raíz (la última nota nombrada en el texto)"""
    for word in reversed(text.split()):
        if word in valid_notes:
            return word
    return None

def simplify_chord(chord_text):
    chord_text = chord_text.strip()

    # Buscar nota con posible sufijo -major, -minor, etc.
    match = re.search(r"([A-G][b#]?)[- ]?(minor|major)?", chord_text, re.IGNORECASE)
    
    if match:
        note = match.group(1)
        quality = match.group(2)
        if quality and quality.lower() == "minor":
            return f"minor {note}"
        else:
            return note
    return ""


def remove_consecutive_duplicates(chord_list):
    if not chord_list:
        return []
    result = [chord_list[0]]
    for chord in chord_list[1:]:
        if chord != result[-1]:
            result.append(chord)
    return result


# Recorrer el JSON y simplificar acordes
simplified_data = {}

for artist, songs in full_data.items():
    simplified_data[artist] = {}
    for song_name, chords in songs.items():
        simplified_chords = [simplify_chord(c) for c in chords]
        simplified_chords = remove_consecutive_duplicates(simplified_chords)
        simplified_data[artist][song_name] = simplified_chords

# Guardar resultado
with open("data/simplified_chords.json", "w") as f:
    json.dump(simplified_data, f, indent=2)
