In [31]:
import pandas as pd
import numpy as np

from dataclasses import dataclass
from pathlib import Path
from typing import Iterable, List, Optional, Tuple

import muspy

In [32]:
PROJECT_ROOT = Path.cwd()
DATA_ROOT = PROJECT_ROOT / "data" / "essen"
DATA_ROOT.mkdir(parents=True, exist_ok=True)

ds = muspy.datasets.EssenFolkSongDatabase(
    root=Path("data/essen"),
    download_and_extract=True,
    convert=False, 
    use_converted=None,
    verbose=True,
    n_jobs=-1
)

Skip downloading as the `.muspy.success` file is found.
Skip extracting as the `.muspy.success` file is found.


In [33]:
print("Песен:", len(ds))

Песен: 10457


In [34]:
def music_to_tokens(music: muspy.Music, step_division: int = 8):

    step_ticks = max(1, int(music.resolution) // step_division)
    tracks = [t for t in music.tracks if t.notes]
    track = max(tracks, key=lambda t: len(t.notes))

    notes = sorted(track.notes, key=lambda n: n.time)

    tokens = []
    prev_end = 0

    for n in notes:
        rest_ticks = max(0, int(n.time) - int(prev_end))

        rest = int(np.round(rest_ticks / step_ticks)) # это пауза перед нотой в шагах
        dur = max(1, int(np.round(int(n.duration) / step_ticks))) # это длительность ноты в шагах
        pitch = int(n.pitch) # это значение MIDI ноты (от 0 до 127)

        tokens.append((pitch, dur, rest))
        prev_end = max(prev_end, int(n.time) + int(n.duration)) 

    return tokens

In [35]:
import json

with open("data/essen_tokens.jsonl", "r", encoding="utf-8") as f:
    first = json.loads(f.readline())

print("Первые 10 токенов:", first[:10])
print("Длина:", len(first))

Первые 10 токенов: [[69, 8, 0], [69, 8, 0], [69, 8, 0], [69, 8, 0], [71, 8, 0], [72, 8, 0], [69, 8, 0], [69, 16, 0], [72, 8, 8], [73, 8, 0]]
Длина: 104


In [36]:
songs = []
for i in range (1000, 2000, 1):
    try:
        music = ds[i]
    except: 
        continue
    music = ds[i]
    tokens = music_to_tokens(music)
    songs.append(tokens)

print("Количество песен:", len(songs))

  return self.iter().getElementsByClass(classFilterList)


Количество песен: 1000


In [37]:
songs = list(filter(lambda x: len(x) >= 30, songs))
print("Песен для обучения:", len(songs))

Песен для обучения: 769


In [38]:
from collections import defaultdict, Counter

def make_markov_chain(songs):
    transitions = defaultdict(Counter) # матрица переходов
    starts = Counter()

    for tokens in songs:
        starts[tokens[0]] += 1
        for a, b in zip(tokens, tokens[1:]):
            transitions[a][b] += 1

    return starts, transitions


In [48]:
import random

def sample_from_counter(counter: Counter):
    items = list(counter.items())

    states, weights = [], []
    for s, w in items:
        states.append(s)
        weights.append(w)
    sum_weights = sum(weights)

    return random.choices(states, weights=np.array(weights)/sum_weights)[0]

def create_melody(starts, trans, length=200):
    cur = sample_from_counter(starts)
    out = [cur]
    for _ in range(length - 1):
        if len(trans[cur]) == 0:
            cur = sample_from_counter(starts)
        else:
            cur = sample_from_counter(trans[cur])
        out.append(cur)
    return out


In [50]:
starts, trans = make_markov_chain(songs)
gen_tokens = create_melody(starts, trans, length=200)

print("Сгенерировано токенов:", len(gen_tokens))
print("Первые 10:", gen_tokens[:10])

Сгенерировано токенов: 200
Первые 10: [(62, 4, 0), (67, 6, 0), (67, 2, 0), (67, 2, 0), (69, 4, 0), (70, 4, 0), (69, 4, 0), (67, 4, 0), (66, 8, 0), (74, 4, 0)]


In [59]:
import muspy

def tokens_to_midi(tokens, out_midi="markov.mid", resolution=480, step_division=8, program=102):
    step_ticks = resolution // step_division
    music = muspy.Music(resolution=resolution)
    track = muspy.Track(program=program)
    music.tracks.append(track)

    t = 0
    for pitch, dur, rest in tokens:
        t += int(rest) * step_ticks
        track.notes.append(muspy.Note(time=t, pitch=int(pitch), duration=int(dur) * step_ticks, velocity=80))
        t += int(dur) * step_ticks

    muspy.write_midi(out_midi, music)
    return out_midi

tokens_to_midi(gen_tokens, "markov.mid")


'markov.mid'

In [None]:
def create_cyclic_melody(starts, trans, length=10, repetitions=10, name="markov_cyclic.mid", program=102):
    sample = create_melody(starts, trans, length=length)
    cyclic = sample * repetitions
    tokens_to_midi(cyclic, name, program=program)

def get_cyclic_tokens(starts, trans, length=10, repetitions=10):
    sample = create_melody(starts, trans, length=length)
    cyclic = sample * repetitions
    return cyclic

In [66]:
create_cyclic_melody(starts, trans, length=8, repetitions=10, name="markov_cyclic.mid", program=10)

In [None]:
import muspy

def tokens_to_midi_two_tracks(
    melody_tokens,
    resolution=480,
    step_division=4,
    melody_program=0,
    acc_program=0,
    name="two_tracks.mid"
):
    step_ticks = resolution // step_division

    music = muspy.Music(resolution=resolution)

    melody = muspy.Track(program=melody_program)
    acc = muspy.Track(program=acc_program)

    music.tracks.append(melody)
    music.tracks.append(acc)

    t = 0
    for pitch, dur, rest in melody_tokens:
        t += int(rest) * step_ticks
        dur_ticks = max(1, int(dur) * step_ticks)
        melody.notes.append(muspy.Note(time=t, pitch=int(pitch), duration=dur_ticks, velocity=90)) # это генерация основной мелодии

        # генерация аккомпанемента (триада, что бы это ни значило...), мажорный аккорд
        root = int(pitch) - 24                 # бас на 2 октавы ниже
        third = int(pitch) - 12 + 4            # мажорная терция
        fifth = int(pitch) - 12 + 7            # квинта

        for p in (root, third, fifth):
            if 0 <= p <= 127:
                acc.notes.append(muspy.Note(time=t, pitch=p, duration=dur_ticks, velocity=60))

        t += dur_ticks

    muspy.write_midi(name, music)
    return name

tokens_to_midi_two_tracks(gen_tokens, melody_program=0, acc_program=32)

Готово: markov.mid


In [81]:
cyclic_acc_tokens = get_cyclic_tokens(starts, trans, length=10, repetitions=10)
tokens_to_midi_two_tracks(
    cyclic_acc_tokens,
    melody_program=0,
    acc_program=0,
    step_division=6,
    name="markov_cyclic_two_tracks_2.mid"
)

'markov_cyclic_two_tracks_2.mid'