In [8]:
import random
from glob import glob
from collections import defaultdict

import numpy as np
from numpy.random import choice

from symusic import Score
from miditok import REMI, TokenizerConfig
from midiutil import MIDIFile

@inproceedings{pop909-ismir2020,
    author = {Ziyu Wang* and Ke Chen* and Junyan Jiang and Yiyi Zhang and Maoran Xu and Shuqi Dai and Guxian Bin and Gus Xia},
    title = {POP909: A Pop-song Dataset for Music Arrangement Generation},
    booktitle = {Proceedings of 21st International Conference on Music Information Retrieval, {ISMIR}},
    year = {2020}
}

In [9]:
FILE_PATH = ""

config = TokenizerConfig(num_velocities=1, use_chords=False, use_programs=False)
tokenizer = REMI(config)
tokenizer.train(vocab_size=1000, files_paths=FILE_PATH)






In [10]:
def note_extraction(midi_file):
    # Q1a: Your code goes here
    midi = Score(midi_file)
    tokens = tokenizer(midi)[0].tokens
    all_pitches = filter(lambda s: "Pitch" in s, tokens)
    pitch_list = list(map(lambda x: int(x[x.find("_") + 1:]), all_pitches))
    return pitch_list

In [11]:
def note_frequency(midi_files):
    # Q1b: Your code goes here
    note_dict = defaultdict(int)
    for midi_file in midi_files:
        pitch_list = note_extraction(midi_file)
        for pitch in pitch_list:
            note_dict[pitch] += 1
    return note_dict

Calculate probabilities


In [12]:
def note_unigram_probability(midi_files):
    note_counts = note_frequency(midi_files)
    unigramProbabilities = {}
    
    # Q2: Your code goes here
    total_count = sum(note_counts.values())
    for key in note_counts.keys():
        unigramProbabilities[key] = note_counts[key] / total_count
    
    return unigramProbabilities

In [13]:
def note_bigram_probability(midi_files):
    bigramTransitions = defaultdict(list)
    bigramTransitionProbabilities = defaultdict(list)

    # {62: {60: 3, 64: 4}, ....}
    bigramCounts = defaultdict(lambda: defaultdict(int))

    # Q3a: Your code goes here
    for midi_file in midi_files:
        pitch_list = note_extraction(midi_file)
        for i in range(1, len(pitch_list)):
            # Update count for next note 
            bigramCounts[pitch_list[i-1]][pitch_list[i]] += 1
    
    # (62, {60: 3, 64: 4})
    for prev_note, next_notes in bigramCounts.items():
        # 3 + 4 = 7
        next_note_sum = sum(next_notes.values())
        bigramTransitions[prev_note] = list(next_notes.keys())
        bigramTransitionProbabilities[prev_note] = list([bigramCounts[prev_note][next_note] / next_note_sum for next_note in next_notes])

    return bigramTransitions, bigramTransitionProbabilities

In [14]:
def note_trigram_probability(midi_files):
    trigramTransitions = defaultdict(list)
    trigramTransitionProbabilities = defaultdict(list)
    # {(60, 62): {64:1, 66:4}, ...}
    trigramCounts = defaultdict(lambda: defaultdict(int))
    
    # Q5a: Your code goes here
    for midi_file in midi_files:
        pitch_list = note_extraction(midi_file)
        for i in range(2, len(pitch_list)):
            trigramCounts[(pitch_list[i-2], pitch_list[i-1])][pitch_list[i]] += 1

    # ((60, 62), {64:1, 66:4})
    for prev_notes, next_notes in trigramCounts.items():
        next_note_sum = sum(next_notes.values())
        trigramTransitions[prev_notes] = list(next_notes.keys())
        trigramTransitionProbabilities[prev_notes] = list([trigramCounts[prev_notes][next_note] / next_note_sum for next_note in next_notes])

    return trigramTransitions, trigramTransitionProbabilities

In [15]:
def music_generate(length):
    # Sample notes
    unigramProbabilities = note_unigram_probability(midi_files)
    bigramTransitions, bigramTransitionProbabilities = note_bigram_probability(midi_files)
    trigramTransitions, trigramTransitionProbabilities = note_trigram_probability(midi_files)
    
    # Q10: Your code goes here ...
    sampled_notes = []

    # Sample first note
    first_note = random.choices(
        list(unigramProbabilities.keys()),
        weights=list(unigramProbabilities.values()),
        k=1
    )[0]
    
    # Sample second note
    second_note = random.choices(
        list(bigramTransitions[first_note]),
        weights=list(bigramTransitionProbabilities[first_note]),
        k=1
    )[0]

    sampled_notes.append(first_note)
    sampled_notes.append(second_note)

    for i in range(length - 2):
        prev_two_notes = (sampled_notes[-2], sampled_notes[-1])
        sampled_note = random.choices(
            list(trigramTransitions[prev_two_notes]),
            weights=list(trigramTransitionProbabilities[prev_two_notes]),
            k=1
        )[0]
        sampled_notes.append(sampled_note)
    
    # Sample beats
    bigramBeatPosTransitions, bigramBeatPosTransitionProbabilities = beat_pos_bigram_probability(midi_files)
    sampled_beats = []
    # i = Note
    i = 1
    pos = 0
    for i in range(length):
        sampled_beat_length = random.choices(
            list(bigramBeatPosTransitions[pos]),
            weights=list(bigramBeatPosTransitionProbabilities[pos]),
            k=1
        )[0]
        sampled_beats.append(sampled_beat_length / 8)
        pos += sampled_beat_length
        if pos >= 32:
            pos = 0

    # Save the generated music as a midi file
    midi = MIDIFile(1) # Create a MIDI file that consists of 1 track
    track = 0 # Set track number
    time = 0 # Where is the event placed (at the beginning)
    tempo = 120 # The tempo (beats per minute)
    midi.addTempo(track, time, tempo) # Add tempo information

    current_time = 0
    for note, beat_length in zip(sampled_notes, sampled_beats):
        midi.addNote(track, 0, note, current_time, beat_length, 100)
        current_time += beat_length
    
    # Write MIDI file
    with open("q10.mid", "wb") as f:
        midi.writeFile(f) 
