In [None]:
import os
import uuid
from pathlib import Path
from typing import List

import numpy as np
import pandas as pd
from datasets import load_from_disk
from midistral.audio_analysis import (
    genre_classes,
    get_chords,
    get_mood_and_genre,
    instruments_classes,
    mood_classes,
)
from midistral.midi_utils import get_instruments, get_midi_and_ogg_from_abc
from sklearn.metrics import classification_report

In [None]:
def get_vector(genres: List[str], labels: List[str]):
    v = [0] * len(labels)
    for g in genres:
        index = labels.index(g)
        v[index] = 1
    return v


def evaluate_prediction(
    labels: List[str],
    predicted: List[List[str]],
    ground_truth: List[List[str]],
):
    predicted_vector = []
    ground_truth_vector = []
    for pg in predicted:
        predicted_vector.append(get_vector(pg, labels))
    for gtg in ground_truth:
        ground_truth_vector.append(get_vector(gtg, labels))

    y_true = np.array(ground_truth_vector)
    y_pred = np.array(predicted_vector)
    report = classification_report(
        y_true, y_pred, target_names=labels, output_dict=True
    )

    return report


In [None]:
NOTEBOOKS_FOLDER = Path(os.getcwd())
OUTPUT_FOLDER = NOTEBOOKS_FOLDER.parent / "output"
DATA_FOLDER = NOTEBOOKS_FOLDER.parent / "data"

TEST_DATASET = load_from_disk(OUTPUT_FOLDER / "datasets" / "midi_abc_dataset-test")

In [None]:

for m in ["model1"]:
    ANNOTATION_OUTPUT_PATH = OUTPUT_FOLDER / f"annotations_{m}_output.jsonl"
    TMP_AUDIO_FOLDER = OUTPUT_FOLDER / "tmp_audio" / m
    TMP_AUDIO_FOLDER.mkdir(exist_ok=True, parents=True)

    genre_gt = []
    mood_gt = []
    instruments_gt = []

    genre_preds = []
    mood_preds = []
    instruments_preds = []

    for r in TEST_DATASET:
        genre_gt.append(r["genre"])
        mood_gt.append(r["mood"])
        instruments_gt.append(r["instrument_summary"])

        # run inference
        abc_notation_text = "X: 1\nM: 4/4\nL: 1/8\nQ:1/4=120\nK:D\nV:1\n%%MIDI program 0\n G/2G/2c/2A/2| B/2B/2d/2G/2| A/2A/2F/2G/2| B/2B/2d/2G/2|G/2G/2c/2A/2| B/2B/2d/2G/2| A/2A/2F/2G/2| B/2B/2d/2G/2|G/2G/2c/2A/2| B/2B/2d/2G/2| A/2A/2F/2G/2| B/2B/2d/2G/2| B/2B/2d/2G/2| A/2A/2F/2G/2| B/2B/2d/2G/2| B/2B/2d/2G/2| A/2A/2F/2G/2| B/2B/2d/2G/2|\n"

        # generate audio
        midi, ogg = get_midi_and_ogg_from_abc(abc_notation_text)
        file_uuid = str(uuid.uuid4())
        for extension, b in [("midi", midi), ("ogg", ogg)]:
            p = TMP_AUDIO_FOLDER / f"{file_uuid}.{extension}"
            p.parent.mkdir(parents=True, exist_ok=True)
            with p.open("wb") as f:
                f.write(b)

        # analyse it
        chords_out, chord_summary = get_chords(TMP_AUDIO_FOLDER / f"{file_uuid}.ogg")
        mood_tags, mood_cs, genre_tags, genre_cs = get_mood_and_genre(
            TMP_AUDIO_FOLDER / f"{file_uuid}.ogg"
        )
        instrument_numbers_sorted, instrument_summary = get_instruments(
            TMP_AUDIO_FOLDER / f"{file_uuid}.midi"
        )

        # log it
        mood_preds.append(mood_tags)
        genre_preds.append(genre_tags[:2])
        instruments_preds.append(instrument_summary)

        break

    res_df = pd.DataFrame(
        {
            "mood_preds": mood_preds,
            "mood_gt": mood_gt,
            "genre_preds": genre_preds,
            "genre_gt": genre_gt,
            "instruments_preds": instruments_preds,
            "instruments_gt": instruments_gt,
        }
    )

    res_df.to_json(ANNOTATION_OUTPUT_PATH, orient="records", lines=True)


    df = pd.read_json(ANNOTATION_OUTPUT_PATH, lines=True)
    genre_report = evaluate_prediction(
        genre_classes, df["genre_preds"].to_list(), df["genre_gt"].to_list()
    )
    mood_report = evaluate_prediction(
        mood_classes, df["mood_preds"].to_list(), df["mood_gt"].to_list()
    )
    instruments_report = evaluate_prediction(
        instruments_classes,
        df["instruments_preds"].to_list(),
        df["instruments_gt"].to_list(),
    )
