[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/gomar0801/BachGen/blob/main/notebooks/Pipeline%20complet%20BachGen.ipynb)

In [1]:
%%capture
# Clone the BachGen repository
!rm -rf BachGen && git clone https://github.com/gomar0801/BachGen.git
!chmod +x ./BachGen/scripts/setup.sh
!./BachGen/scripts/setup.sh

In [None]:
!rm -rf BachGen && git clone https://github.com/gomar0801/BachGen.git

Cloning into 'BachGen'...
remote: Enumerating objects: 414, done.[K
remote: Counting objects:   0% (1/224)[Kremote: Counting objects:   1% (3/224)[Kremote: Counting objects:   2% (5/224)[Kremote: Counting objects:   3% (7/224)[Kremote: Counting objects:   4% (9/224)[Kremote: Counting objects:   5% (12/224)[Kremote: Counting objects:   6% (14/224)[Kremote: Counting objects:   7% (16/224)[Kremote: Counting objects:   8% (18/224)[Kremote: Counting objects:   9% (21/224)[Kremote: Counting objects:  10% (23/224)[Kremote: Counting objects:  11% (25/224)[Kremote: Counting objects:  12% (27/224)[Kremote: Counting objects:  13% (30/224)[Kremote: Counting objects:  14% (32/224)[Kremote: Counting objects:  15% (34/224)[Kremote: Counting objects:  16% (36/224)[Kremote: Counting objects:  17% (39/224)[Kremote: Counting objects:  18% (41/224)[Kremote: Counting objects:  19% (43/224)[Kremote: Counting objects:  20% (45/224)[Kremote: Counting objects:  21% (4

In [2]:
from bachgen.download_data import download_all
from bachgen.extract import extract_archive
from bachgen.data_filter import load_and_filter_piano_classical
from bachgen.display_and_play_partition import display_and_play
from bachgen.score_to_tokens_solution_all2 import MusicXML_to_tokens
from bachgen.tokens_to_musicxml import convert_tokens_to_musicxml

  import pkg_resources


# Telechargement des données

In [3]:
download_all()

⬇️  Téléchargement depuis https://zenodo.org/records/15571083/files/mxl.tar.gz?download=1 ...
✅ Fichier téléchargé : data/mxl.tar.gz
⬇️  Téléchargement depuis https://zenodo.org/records/15571083/files/PDMX.csv?download=1 ...
✅ Fichier téléchargé : data/PDMX.csv


In [4]:
extract_folder=extract_archive("/content/data/mxl.tar.gz")

📦 Extraction de /content/data/mxl.tar.gz vers data/mxl ...
✅ Extraction terminée dans : data/mxl


# Filtrage

In [5]:
df_piano_classical = load_and_filter_piano_classical()

🎹 24008 partitions de piano (genre classique) trouvées.


In [8]:
import os
import pandas as pd
from music21 import converter

def convert_mxl_to_musicxml(input_path, output_path):
    """
    Convertit un fichier .mxl en un fichier MusicXML (.musicxml) en utilisant music21.
    """
    score = converter.parse(input_path)
    score.write('musicxml', fp=output_path)

def batch_convert_piano_classical(csv_path, mxl_root, output_dir):
    """
    Filtre les partitions de piano classique et convertit leurs fichiers .mxl en .musicxml.

    Args:
        csv_path (str): chemin vers PDMX.csv
        mxl_root (str): dossier racine où sont les fichiers .mxl extraits
        output_dir (str): dossier de sortie pour les fichiers .musicxml
    """
    df = pd.read_csv(csv_path)

    # Filtrage piano classique
    df_piano = df[df['n_tracks'] == 2]
    df_piano_classical = df_piano[df_piano['genres'] == 'classical']

    os.makedirs(output_dir, exist_ok=True)

    for _, row in df_piano_classical.iterrows():
        relative_path = row['path']  # ou la bonne colonne du CSV
        input_path = os.path.join(mxl_root, relative_path)

        if not os.path.exists(input_path):
            print(f"⚠️ Fichier introuvable : {input_path}")
            continue

        output_filename = os.path.splitext(os.path.basename(relative_path))[0] + ".musicxml"
        output_path = os.path.join(output_dir, output_filename)

        try:
            print(f"🎼 Conversion : {input_path} → {output_path}")
            convert_mxl_to_musicxml(input_path, output_path)
        except Exception as e:
            print(f"❌ Erreur avec {input_path} : {e}")

    print(f"✅ Conversion terminée. Fichiers sauvegardés dans : {output_dir}")

# Exemple d'utilisation :
if __name__ == "__main__":
    batch_convert_piano_classical(
        csv_path="data/PDMX.csv",
        mxl_root="/content/data/mxl/mxl",
        output_dir="data/musicxml_classical_piano"
    )


[1;30;43mLe flux de sortie a été tronqué et ne contient que les 5000 dernières lignes.[0m
⚠️ Fichier introuvable : /content/data/mxl/mxl/./data/11/42/QmTsASnDgXU5o2Ti6DNdhgeQQwkd3svESjjVcRsug9dmPS.json
⚠️ Fichier introuvable : /content/data/mxl/mxl/./data/11/47/QmTUTrbjQQaKueU9HfFLDNGEVGF6b9oCs23pqCwPx6oSPG.json
⚠️ Fichier introuvable : /content/data/mxl/mxl/./data/11/47/QmTUPhvA5f2NyLHwHasCx4EvjjydWJSbf8SAyrzm6r6G1B.json
⚠️ Fichier introuvable : /content/data/mxl/mxl/./data/11/47/QmTUxSNEsB6ckGRBjfgH9CW4XEqZuor7osSfwxmTzotkMj.json
⚠️ Fichier introuvable : /content/data/mxl/mxl/./data/11/47/QmTUSJnsJ1fpQhyB5TQ5YKmhL3MUiY9UTSPbgX1UBrRp4N.json
⚠️ Fichier introuvable : /content/data/mxl/mxl/./data/11/47/QmTUrcz5cEh3U2BH4TVbTQxwXkXuZTxY49s3ms2fWZetW8.json
⚠️ Fichier introuvable : /content/data/mxl/mxl/./data/11/47/QmTU9Epbr4RXZgyAa7MtAZQJSyLKw96BUEPdy8Yj6Nhn5Y.json
⚠️ Fichier introuvable : /content/data/mxl/mxl/./data/11/47/QmTUdEfeZhphsgS77t3rjfpz8gy4rZymX6sHuqfiaAyt1S.json
⚠️ Fichier i