In [1]:
from __future__ import print_function, division
import sys
import matplotlib.pyplot as plt
import numpy as np
import pickle
import glob

from music21 import converter, instrument, note, chord

# Import relative entropy as KL Divergence
from scipy.special import rel_entr
print("Imports done")


Imports done


# Warm up: Calculate simple KL_D example

In [None]:
#def calculate_KL_divergence(M, p_of_M, M_A):
#define simple distributions
p = [0.20, 0.40, 0.50]
q = [0.60, 0.15, 0.1]
# calculate P//Q
KL_PQ = rel_entr(p,q)
print(KL_PQ)
print('KL(P || Q): %.3f' %sum(KL_PQ))

print("\nCheck if formula gets the same results: ")
print(p[0]*np.log(p[0]/q[0]))
print(p[1]*np.log(p[1]/q[1]))
print(p[2]*np.log(p[2]/q[2]))


# 0. Get notes for suprise calculation

In [2]:
def get_notes(dir): #dir
    """ Get all the notes and chords from the midi files """
    notes = []
    for file in glob.glob(dir):
    #file = "love_simple\Advance_To_The_Rear_obpi_trans.mid"       
        midi = converter.parse(file)

        print("Parsing %s" % file)

        notes_to_parse = None

        try:  # file has instrument parts
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse()
        except:  # file has notes in a flat structure
            notes_to_parse = midi.flat.notes

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))
    return notes

# 1. Calculate two-step transitions between notes

In [3]:
from collections import Counter

#notes = get_notes("love_simple\*.mid")
notes = get_notes("anger_simple\*.mid")

M = len(notes)

two_transitions = []
for i in range(0, M-1):
    two_transitions.append((notes[i], notes[i+1]))
total_transitions = len(two_transitions)
print("There are %i pairwise transitions" %total_transitions)

#counts Two Transitions
counts_tt = Counter(two_transitions)

unique_trans = set(counts_tt)

abs_trans = {}
for u in unique_trans:
    abs_trans[u] = counts_tt[u]
    print("Absolute number of %s is % i." %(u, abs_trans[u]))

# turn absolute transition frequencies into relative frequencies
print("Prior probability:")
A_M = {}
for a in unique_trans:
    A_M[a] = abs_trans[a]/total_transitions

for a in A_M:
    print("p(%s|%s) is %.4f." %(a[1],a[0],A_M[a]))

Parsing anger_simple\Axel_pifa_45.mid
Parsing anger_simple\Bombing_Mission_pifa.mid
Parsing anger_simple\Duel_of_the_fates_pifa_70.mid
Parsing anger_simple\Edward_Scissorhands_piob_48.mid
Parsing anger_simple\Elfman_pifa_63.mid
Parsing anger_simple\james_bond_pifa.mid
Parsing anger_simple\Jaws_piob_35.mid
Parsing anger_simple\mission_impossible_pifl.mid
Parsing anger_simple\Phantom_of_the_opera_pifa.mid
Parsing anger_simple\Psycho_piha_64.mid
Parsing anger_simple\shaft_pifl.mid
Parsing anger_simple\Xfiles_piob_18.mid
There are 1769 pairwise transitions
Absolute number of ('C#6', 'G#6') is  1.
Absolute number of ('2.5', '11.2.6') is  1.
Absolute number of ('B2', 'C#3') is  2.
Absolute number of ('4', '4') is  13.
Absolute number of ('G#1', 'C2') is  6.
Absolute number of ('11.4', 'C4') is  1.
Absolute number of ('2.5', '2.5') is  2.
Absolute number of ('D2', 'A1') is  12.
Absolute number of ('C5', 'C#5') is  3.
Absolute number of ('2.6.9', '1.5.8') is  1.
Absolute number of ('0.1.4.8', 

# 2. Calulate KL Divergence as Surprise factor for a new sequence
# The result should not be negative!

# For two-step transitions

In [4]:
#def calculate_KL_divergence(M, p_of_M, M_A):
print("Calculate transition frequencies for the new sequence")

new_notes = []

#file = "outputs-for-stats\\abasic_110.mid"
#file = "outputs-for-stats\\b1-5_120.mid"
#file = "outputs-for-stats\d1-10_150.mid"
#file = "outputs-for-stats\c10_100.mid"
file = "outputs-for-stats\ext_800.mid"

midi = converter.parse(file)

print("Parsing %s" % file)

notes_to_parse = None

try:  # file has instrument parts
    s2 = instrument.partitionByInstrument(midi)
    notes_to_parse = s2.parts[0].recurse()
except:  # file has notes in a flat structure
    notes_to_parse = midi.flat.notes

for element in notes_to_parse:
    if isinstance(element, note.Note):
        new_notes.append(str(element.pitch))
    elif isinstance(element, chord.Chord):
        new_notes.append('.'.join(str(n) for n in element.normalOrder))


new_seq_trans = []
for i in range(0, len(new_notes)-1):
    new_seq_trans.append((new_notes[i], new_notes[i+1]))

print("Include new transitions into transitions of training corpus")
new_two_transitions = []
for t in two_transitions:
    new_two_transitions.append(t)
for t in new_seq_trans: 
    new_two_transitions.append(t)


total_transitions_new = len(new_two_transitions)
print("Now there are %i pairwise transitions" %total_transitions_new)

#counts Two Transitions
counts_tt_new = Counter(new_two_transitions)

unique_trans_new = set(counts_tt_new)

abs_trans_new = {}
for u in unique_trans_new:
    abs_trans_new[u] = counts_tt_new[u]
    #print("Now the absolute number of %s is % i." %(u, abs_trans_new[u]))

# turn absolute transition frequencies into relative frequencies
print("Posterior probability:")
M_A = {}
for a in unique_trans_new:
    M_A[a] = abs_trans_new[a]/total_transitions_new

for a in M_A:
    print("p(%s|%s) is %.4f." %(a[1],a[0],M_A[a]))
    
    
# KL divergence: D(p||q) --> D(p(M|A)||p(M)) --> p=p(M|A), q=p(M)
print("Calculate KL Divergence for each pairwise transition in the notes")
KL_MA = {}
counter_unseen = 0
for ma in M_A:
    p = M_A[ma]
    #print("p = %.6f" %p)
    if ma not in A_M:
        q = 0.000001 #0.000001
        counter_unseen += 1
    else: 
        q = A_M[ma]
    #print("q = %.6f" %q)
    KL_MA[ma] = rel_entr(p,q)


print("Calculate the surprise by summing up the KL_divergence for the respective transitions")
surprise = 0
for kl in KL_MA:
    print("+ %.6f for transition %s" %(KL_MA[kl], kl))
    surprise += KL_MA[kl]
    
print("The surprise factor of the new sequence is ",surprise)
print("The number of unseen transitions is", counter_unseen)
print("Number of transitions in the sequence", len(new_seq_trans))

Calculate transition frequencies for the new sequence
Parsing outputs-for-stats\ext_800.mid
Include new transitions into transitions of training corpus
Now there are 1934 pairwise transitions
Posterior probability:
p(G#6|C#6) is 0.0005.
p(11.2.6|2.5) is 0.0005.
p(C#3|B2) is 0.0010.
p(4|4) is 0.0067.
p(C2|G#1) is 0.0031.
p(C4|11.4) is 0.0005.
p(2.5|2.5) is 0.0026.
p(A1|D2) is 0.0062.
p(F4|E-4) is 0.0010.
p(C#5|C5) is 0.0016.
p(1.5.8|2.6.9) is 0.0005.
p(F4|0.1.4.8) is 0.0005.
p(G4|B4) is 0.0124.
p(G6|E6) is 0.0010.
p(E-6|F#5) is 0.0005.
p(A5|2.5) is 0.0005.
p(G5|C6) is 0.0005.
p(D4|F4) is 0.0103.
p(B2|E-3) is 0.0005.
p(C#3|D3) is 0.0010.
p(C6|F5) is 0.0005.
p(F2|C#2) is 0.0021.
p(11.4|2.5) is 0.0005.
p(D3|D3) is 0.0124.
p(3.6.10|C#3) is 0.0005.
p(9.2|8.11) is 0.0005.
p(11.2.6|11.2.6) is 0.0005.
p(2.6.10|2.6.10) is 0.0010.
p(7.0|5.10) is 0.0005.
p(9.0|11.2) is 0.0016.
p(4.8|6.9) is 0.0016.
p(C#4|E-4) is 0.0010.
p(11.2|11.2) is 0.0016.
p(G4|F4) is 0.0031.
p(F2|E2) is 0.0062.
p(G#4|F4) is 0

# Now do the same for three-step transitions. 1. Calculate three-step transition probabilites

In [5]:
from collections import Counter

#notes = get_notes("love_simple\*.mid")
notes = get_notes("anger_simple\*.mid")
#notes = get_notes()
print(notes)

M = len(notes)

three_transitions = []
for i in range(0, M-2):
    three_transitions.append((notes[i], notes[i+1], notes[i+2]))
total_transitions_3 = len(three_transitions)
print("There are %i triple transitions" %total_transitions_3)

#counts Three Transitions
counts_3 = Counter(three_transitions)

unique_trans_3 = set(counts_3)
print("There are %i unique triple transitions" %len(unique_trans_3))

abs_trans_3 = {}
for u in unique_trans_3:
    abs_trans_3[u] = counts_3[u]
    print("Absolute number of %s is % i." %(u, abs_trans_3[u]))

# turn absolute transition frequencies into relative frequencies
print("Prior probability:")
A_M_3 = {}
for a in unique_trans_3:
    A_M_3[a] = abs_trans_3[a]/total_transitions_3

for a in A_M_3:
    print("p(%s|%s,%s) is %.4f." %(a[2],a[1],a[0],A_M_3[a]))

Parsing anger_simple\Axel_pifa_45.mid
Parsing anger_simple\Bombing_Mission_pifa.mid
Parsing anger_simple\Duel_of_the_fates_pifa_70.mid
Parsing anger_simple\Edward_Scissorhands_piob_48.mid
Parsing anger_simple\Elfman_pifa_63.mid
Parsing anger_simple\james_bond_pifa.mid
Parsing anger_simple\Jaws_piob_35.mid
Parsing anger_simple\mission_impossible_pifl.mid
Parsing anger_simple\Phantom_of_the_opera_pifa.mid
Parsing anger_simple\Psycho_piha_64.mid
Parsing anger_simple\shaft_pifl.mid
Parsing anger_simple\Xfiles_piob_18.mid
['E4', 'G4', 'E4', 'E4', 'A4', 'E4', 'D4', 'E4', 'B4', 'E4', 'E4', 'C5', 'B4', 'G4', 'E4', 'B4', 'E5', 'E4', 'D4', 'D4', 'B3', 'F#4', 'E4', 'E4', 'G4', 'E4', 'E4', 'A4', 'E4', 'D4', 'E4', 'B4', 'E4', 'E4', 'C5', 'B4', 'G4', 'E4', 'B4', 'E5', 'E4', 'D4', 'D4', 'B3', 'F#4', 'E4', 'E4', 'G4', 'E4', 'E4', 'A4', 'E4', 'D4', 'E4', 'B4', 'E4', 'E4', 'C5', 'B4', 'G4', 'E4', 'B4', 'E5', 'E4', 'D4', 'D4', 'B3', 'F#4', 'E4', 'E4', 'G4', 'E4', 'E4', 'A4', 'E4', 'D4', 'E4', 'B4', 'E4',

Absolute number of ('B5', 'G5', '8.0') is  1.
Absolute number of ('C7', 'B6', 'G6') is  4.
Absolute number of ('9.11.0.4', '9.11.0.4', '9.11.0.4') is  3.
Absolute number of ('B3', 'B4', 'B4') is  2.
Absolute number of ('11.4', '4.8', '8.11') is  1.
Absolute number of ('C#4', 'D4', 'E4') is  7.
Absolute number of ('E-6', 'C6', 'F#5') is  1.
Absolute number of ('D6', 'E6', 'G6') is  2.
Absolute number of ('F2', 'G2', 'G2') is  2.
Absolute number of ('C#4', 'C4', 'E4') is  3.
Absolute number of ('E-5', 'F5', 'C6') is  1.
Absolute number of ('E4', 'F4', 'G4') is  1.
Absolute number of ('G4', 'G#4', 'C5') is  15.
Absolute number of ('D4', 'E4', 'G4') is  1.
Absolute number of ('A4', '8.0', 'C5') is  1.
Absolute number of ('B-5', 'B5', 'C6') is  1.
Absolute number of ('B-3', 'C4', 'C#4') is  2.
Absolute number of ('A4', 'E4', 'D4') is  5.
Absolute number of ('G#2', 'G#1', 'D2') is  4.
Absolute number of ('0.4', '4.9', '9.0') is  7.
Absolute number of ('11.2', '9.1', '7.11') is  1.
Absolute n

# 2. Surprise factor for three-step transitions

In [7]:
#def calculate_KL_divergence(M, p_of_M, M_A):
print("Calculate triple transition frequencies for the new sequence")

new_notes = []
#file = "outputs-for-stats\\abasic_444.mid"
#file = "outputs-for-stats\\b1-5_76.mid"
#file = "outputs-for-stats\d1-10_80.mid"
#file = "outputs-for-stats\c10_320.mid"
file = "outputs-for-stats\ext_800.mid"

midi = converter.parse(file)

print("Parsing %s" % file)

notes_to_parse = None

try:  # file has instrument parts
    s2 = instrument.partitionByInstrument(midi)
    notes_to_parse = s2.parts[0].recurse()
except:  # file has notes in a flat structure
    notes_to_parse = midi.flat.notes

for element in notes_to_parse:
    if isinstance(element, note.Note):
        new_notes.append(str(element.pitch))
    elif isinstance(element, chord.Chord):
        new_notes.append('.'.join(str(n) for n in element.normalOrder))

new_seq_trans_3 = []
print(len(new_notes)) 
for i in range(0, len(new_notes)-2): 
    new_seq_trans_3.append((new_notes[i], new_notes[i+1], new_notes[i+2]))
print(new_seq_trans_3)

print("Include new transitions into transitions of training corpus")
new_transitions_3 = []
for t in three_transitions:
    new_transitions_3.append(t)
for t in new_seq_trans_3: 
    new_transitions_3.append(t)


total_transitions_3_new = len(new_transitions_3)
print("Now there are %i triple transitions" %total_transitions_3_new)

#counts Three Transitions
counts_3_new = Counter(new_transitions_3)

unique_trans_3_new = set(counts_3_new)

abs_trans_3_new = {}
for u in unique_trans_3_new:
    abs_trans_3_new[u] = counts_3_new[u]
    #print("Now the absolute number of %s is % i." %(u, abs_trans_3_new[u]))

# turn absolute transition frequencies into relative frequencies
print("Posterior probability:")
M_A_3 = {}
for a in unique_trans_3_new:
    M_A_3[a] = abs_trans_3_new[a]/total_transitions_3_new

for a in M_A_3:
    print("p(%s|%s,%s) is %.4f." %(a[2],a[1],a[0],M_A_3[a]))
    
    
# KL divergence: D(p||q) --> D(p(M|A)||p(M)) --> p=p(M|A), q=p(M)
print("Calculate KL Divergence for each pairwise transition in the notes")
KL_MA_3 = {}
counter_unseen_3 = 0
for ma in M_A_3:
    #print(ma)
    p = M_A_3[ma]
    #print("p = %.6f" %p)
    if ma not in A_M_3:
        q = 0.000001 # for comparability with 2-trans; regular smallest q is 0.00056
        counter_unseen_3 +=1
    else: 
        q = A_M_3[ma]
        #print("q = %.6f" %q)
    KL_MA_3[ma] = rel_entr(p,q)

print("Calculate the surprise by summing up the KL_divergence for the respective transitions")
surprise_3 = 0
for kl in KL_MA_3:
    print("+ %.6f for transition %s" %(KL_MA_3[kl], kl))
    surprise_3 += KL_MA_3[kl]
    
print("The surprise factor of the new sequence is ",surprise_3)
print("The number of unseen transitions is", counter_unseen_3)
print("Number of three step transitions in the sequence", len(new_seq_trans_3))

Calculate triple transition frequencies for the new sequence
Parsing outputs-for-stats\ext_800.mid
166
[('2.5', 'B4', 'D4'), ('B4', 'D4', 'F4'), ('D4', 'F4', 'D4'), ('F4', 'D4', 'B4'), ('D4', 'B4', 'D4'), ('B4', 'D4', '2.5'), ('D4', '2.5', 'C4'), ('2.5', 'C4', 'B4'), ('C4', 'B4', 'D4'), ('B4', 'D4', '2.5'), ('D4', '2.5', 'C4'), ('2.5', 'C4', 'D4'), ('C4', 'D4', 'C4'), ('D4', 'C4', 'D4'), ('C4', 'D4', 'C4'), ('D4', 'C4', 'D4'), ('C4', 'D4', 'C4'), ('D4', 'C4', 'D4'), ('C4', 'D4', 'D4'), ('D4', 'D4', 'C4'), ('D4', 'C4', 'D4'), ('C4', 'D4', 'C4'), ('D4', 'C4', 'D4'), ('C4', 'D4', 'C4'), ('D4', 'C4', 'D4'), ('C4', 'D4', 'E-4'), ('D4', 'E-4', 'C4'), ('E-4', 'C4', 'D4'), ('C4', 'D4', 'F4'), ('D4', 'F4', 'B4'), ('F4', 'B4', 'D4'), ('B4', 'D4', 'D4'), ('D4', 'D4', '2.5'), ('D4', '2.5', '2.5'), ('2.5', '2.5', 'E-4'), ('2.5', 'E-4', 'C4'), ('E-4', 'C4', 'D4'), ('C4', 'D4', 'C4'), ('D4', 'C4', 'F4'), ('C4', 'F4', 'D4'), ('F4', 'D4', 'B4'), ('D4', 'B4', 'D4'), ('B4', 'D4', 'D4'), ('D4', 'D4', 'A4'