## Generació d'acords musicals segons una tonalitat

Imports

In [2]:
import re
import os
import json
import music21
import pretty_midi
from tqdm import tqdm
from collections import Counter

Extreure els acords dels arxius .mid a un .json

In [None]:
def midi_notes_to_chord_name(pitches):
    try:
        notes = [music21.note.Note(midi=p) for p in pitches]
        chord = music21.chord.Chord(notes)
        return chord.pitchedCommonName  # Per exemple: 'C major triad'
    except Exception:
        return None

def extract_chords(midi_file):
    try:
        midi_data = pretty_midi.PrettyMIDI(midi_file)
        chords = []

        for instrument in midi_data.instruments:
            if instrument.is_drum: # Ignorar instruments de percussió
                continue

            notes_by_start = {}
            for note in instrument.notes: # Obtenim les notes de l'instrument
                start_rounded = round(note.start, 2)
                if start_rounded not in notes_by_start: # Arrodonim el temps d'inici a 2 decimals
                    notes_by_start[start_rounded] = []
                notes_by_start[start_rounded].append(note.pitch) # Afegim la nota a la llista

            for time in sorted(notes_by_start.keys()): # Iterem sobre els temps d'inici
                pitches = sorted(list(set(notes_by_start[time]))) # Obtenim les notes úniques
                if len(pitches) > 1:
                    chord_name = midi_notes_to_chord_name(pitches) # Convertim les notes a nom d'acord
                    if chord_name:
                        chords.append(chord_name) # Afegim el nom d'acord a la llista

        return chords
    except Exception as e:
        print(f"Error procesando {midi_file}: {e}")
        return []

def process_dataset(root_folder): # Iterar sobre el dataset per extraure els acords
    data = {}
    for artist in tqdm(os.listdir(root_folder)):
        artist_path = os.path.join(root_folder, artist)
        if not os.path.isdir(artist_path):
            continue
        data[artist] = {}
        for song in os.listdir(artist_path):
            if song.endswith(".mid"):
                song_path = os.path.join(artist_path, song)
                chords = extract_chords(song_path)
                if chords:
                    data[artist][song] = chords
    return data

# Ruta d'entrada i sortida
dataset_path = "Midi Dataset Clean"
output_file = "data/raw_chords_dataset.json"

chord_data = process_dataset(dataset_path)

# Guardar en JSON
with open(output_file, "w") as f:
    json.dump(chord_data, f, indent=2)

print(f"Dataset amb els acords guardat a: {output_file}")


Simplificar acords i treure duplicats

In [None]:
with open("data/raw_chords_dataset.json", "r") as f:
    full_data = json.load(f)

# Acords vàlids
valid_notes = ["C", "C#", "Db", "D", "D#", "Eb", "E", "F", "F#", "Gb", "G", "G#", "Ab", "A", "A#", "Bb", "B"]

def extract_note(text): # Extraure la nota de la clau del json
    for word in reversed(text.split()):
        if word in valid_notes:
            return word
    return None

def simplify_chord(chord_text):
    chord_text = chord_text.strip()

    match = re.search(r"([A-G][b#]?)[- ]?(minor|major)?", chord_text, re.IGNORECASE) # Buscar la nota i el seu mode (major o menor)
    
    if match:
        note = match.group(1)
        quality = match.group(2)
        if quality and quality.lower() == "minor":
            return f"minor {note}"
        else:
            return note
    return ""

def normalize_chord(chord): # Normalitzar els acords al mateix format (A, pels majors i Am pels menors)
    chord = chord.strip()
    if 'minor' in chord.lower():
        root = chord.split()[-1].upper()
        return f"{root}m"
    return chord.upper()


def remove_consecutive_duplicates(chord_list): # Eliminar duplicats i consecutius
    if not chord_list:
        return []
    result = [chord_list[0]]
    for chord in chord_list[1:]:
        if chord != result[-1]:
            result.append(chord)
    return result


# Recórrer el dataset i simplificar els acords
simplified_data = {}

for artist, songs in full_data.items():
    simplified_data[artist] = {}
    for song_name, chords in songs.items():
        simplified_chords = [normalize_chord(c) for c in chords]
        #simplified_chords = remove_consecutive_duplicates(simplified_chords)
        simplified_data[artist][song_name] = simplified_chords

# Guardar resultat
with open("data/simplified_chords_duplicates.json", "w") as f:
    json.dump(simplified_data, f, indent=2)


Treure mode, tonalitat, tònica i acords únics de cada cançó

In [None]:
def normalize_chord(chord): # Normalitzar els acords al mateix format (A, pels majors i Am pels menors)
    chord = chord.strip()
    if 'minor' in chord.lower():
        root = chord.split()[-1].upper()
        return f"{root}m"
    return chord.upper()

def detect_tonality(chords): # Detectar la tonalitat a partir dels acords més freqüents
    normalized_chords = [normalize_chord(c) for c in chords]
    chord_counts = Counter(normalized_chords)
    
    # Basar la tonalitat en els 3 acords més comuns
    top_chords = [c[0] for c in chord_counts.most_common(3)]
    
    # Normes per a la detecció de tonalitats
        # 1. Si hi ha un acord menor i el seu relatiu major, és tonalitat menor
        # 2. Si hi ha un acord major i no hi ha el seu relatiu menor, és tonalitat major
        # 3. Si no es pot determinar, es retorna el més comú
    for chord in top_chords:
        if 'm' in chord:  # Acord menor
            tonic = chord.replace('m', '')
            # Verificar si es relatiu menor
            if f"{tonic}m" in top_chords and f"{tonic}" in top_chords:
                return f"{tonic}_MINOR"
        else:  # Acord major
            # Verificar si es tonalitat major
            if f"{chord}" in top_chords and f"{chord}m" not in top_chords:
                return f"{chord}_MAJOR"
    
    # Si no es cap dels dos, retornar el més comú
    most_common = chord_counts.most_common(1)[0][0]
    if 'm' in most_common:
        return f"{most_common.replace('m', '')}_MINOR"
    return f"{most_common}_MAJOR"

def preprocess_json(input_file, output_file): # Preprocessar el JSON amb els acords simplificats
    with open(input_file, 'r') as f:
        data = json.load(f)
    
    processed_data = {"songs": []}
    
    for artist, songs in tqdm(data.items(), desc="Processant artistes"):
        for song_name, chords in songs.items():
            try:
                # Normalitzar i simplificar acords
                normalized_chords = [normalize_chord(c) for c in chords]
                
                # Detectar tonalitat
                tonality = detect_tonality(chords)
                tonic, mode = tonality.split('_')
                
                # Treure el nom de la cançó del fitxer
                song_title = os.path.splitext(song_name)[0]
                
                # Afegir les dades processades al diccionari
                processed_data["songs"].append({
                    "artist": artist,
                    "title": song_title,
                    "original_name": song_name,
                    "tonality": tonality,
                    "tonic": tonic,
                    "mode": mode.lower(),
                    "chords": normalized_chords,
                    "chord_count": len(normalized_chords),
                    "unique_chords": list(set(normalized_chords))
                })
            except Exception as e:
                print(f"Error processant {song_name}: {str(e)}")
                continue
    
    # Guardar dades processades
    with open(output_file, 'w') as f:
        json.dump(processed_data, f, indent=2)
    
    print(f"\nPreprocessament completat, dades guardades a: {output_file}")
    print(f"Total de cançons processades: {len(processed_data['songs'])}")

input_json = "data/raw_chords_dataset.json"
output_json = "data/dataset_tonality_chords2.json"
preprocess_json(input_json, output_json)

Processant artistes: 100%|██████████| 2195/2195 [00:06<00:00, 341.64it/s]
