In [1]:
from collections import Counter
from pathlib import Path
import os

from datasets import load_from_disk
from midistral.abc_utils import has_only_silence
from midistral.db.firestore.crud import (
    create_annotated_abc as firestore_create_annotated_abc,
)
from midistral.db.schemas import AnnotatedAbcCreate
from midistral.db.sqlite.crud import create_annotated_abc as sqlite_create_annotated_abc
from midistral.db.sqlite.database import Base, engine
from midistral.types import AudioTextDescription
from tqdm import tqdm

In [3]:
Base.metadata.drop_all(bind=engine)
Base.metadata.create_all(bind=engine)

NOTEBOOKS_FOLDER = Path(os.getcwd())
OUTPUT_FOLDER = NOTEBOOKS_FOLDER.parent / "output"
midi_abc_dataset = load_from_disk(OUTPUT_FOLDER / "datasets" / "midi_abc_dataset-train")

instruments_c = Counter()
mood_c = Counter()
genre_c = Counter()
for r in tqdm(midi_abc_dataset):
    annotated_abc = AnnotatedAbcCreate(
        abc_notation=r["abc_notation"],
        description=AudioTextDescription(
            genre=[i.lower() for i in r["genre"]],
            mood=[i.lower() for i in r["mood"]],
            instruments=[i.lower() for i in r["instrument_summary"]],
            midi_instruments_num=None
        ),
    )
    for i in r["instrument_summary"]:
        instruments_c[i.lower()] += 1
    for i in r["mood"]:
        mood_c[i.lower()] += 1
    for i in r["genre"]:
        genre_c[i.lower()] += 1
    r = sqlite_create_annotated_abc(annotated_abc)
    # r = firestore_create_annotated_abc(annotated_abc)
print(instruments_c)
print(mood_c)
print(genre_c)

100%|████████████████████████████████████████████████████████████████| 3158/3158 [00:36<00:00, 86.68it/s]

Counter({'piano': 1395, 'acoustic guitar': 298, 'ocarina': 58, 'trumpet': 36, 'drums': 31, 'flute': 21, 'synth lead': 12, 'synth bass': 9, 'orchestral harp': 9, 'contrabass': 5, 'honky-tonk piano': 4, 'muted trumpet': 4, 'distortion guitar': 4, 'french horn': 4, 'clarinet': 4, 'acoustic bass': 3, 'pan flute': 2, 'electric bass': 1, 'vibraphone': 1, 'calliope lead': 1, 'music box': 1, 'trombone': 1, 'synth effects': 1})
Counter({'positive': 1424, 'emotional': 1327, 'calm': 1119, 'energetic': 959})
Counter({'classical': 894, 'electronic': 790, 'pop': 729, 'rock': 684, 'folk': 61})





In [None]:
from midistral.db.sqlite.crud import (
    get_annotated_abcs_from_description as sqlite_get_annotated_abcs_from_description,
)

description = AudioTextDescription(genre=["electronic"], mood=["positive"], instruments=["piano"])

print("looking for ")
print(description)
res = sqlite_get_annotated_abcs_from_description(description, 5)
for r in res:
    print(r.description)