In [None]:
# HOWTO install and create a conda environment with Python 2 with the required packages for Linux in a bash terminal.
# 1. Download and install miniconda2 from
#    https://conda.io/en/latest/miniconda.html
# 2. Activate the base environment for conda
# 3. Create a new environment
#    $ conda create --name py2_zipf_music python=3
# 4. Activate the new environment
#    $ conda activate py2_zipf_music
# 5. Install the required Python packages
#    $ conda install -c anaconda numpy
#    $ conda install -c conda-forge matplotlib
#    $ conda install -c anaconda scipy
#    $ conda install -c mutirri music21 
#    $ conda install -c anaconda jupyter 
# 6. Run Jupyter notebook
#    $ jupyter notebook
# 7. and open py2_zipf_music_example.ipynb
# 8. For the example to work, please download (or analogous)
#        Brandenburg_Concerto_No._5_in_D_Major_BWV_1050_for_Flute_Oboe__Strings.mxl
#    from
#        http://musescore.com/mike_magatagan/scores/4492921
#    since we are not allowed to redistribute it.

In [None]:
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 

In [None]:
plt.rcParams["figure.figsize"] = (13,9)

In [None]:
from collections import defaultdict
import music21 as m21

In [None]:
# Read the .mxl file. This may take some time. Here, you may need to change the path to the file.
mxl = m21.converter.parse( 'DATA/music/classical/bach/Brandenburg_Concerto_No._5_in_D_Major_BWV_1050_for_Flute_Oboe__Strings.mxl' )

In [None]:
# Create raw sequence of notes
seq_raw_notes  = [(el.offset, str(el.nameWithOctave), str(el.duration.quarterLength)) for el in mxl.flat.getElementsByClass(m21.note.Note)]
seq_raw_notes[:10]

In [None]:
# Create raw sequence of chords
seq_raw_chords = [(el.offset, el.pitches, str(el.duration.quarterLength)) for el in mxl.flat.getElementsByClass(m21.chord.Chord)]
seq_raw_chords[:10]

In [None]:
# Create sequence of triplets (time, ptich, time-duration) from the raw sequence of notes and the raw sequence of 
# chords.
seq_triplets = list( seq_raw_notes )
for (ti,pitches,td) in seq_raw_chords:
    for p in pitches:
        seq_triplets.append( (ti,str(p),td) )
seq_triplets[:10]

In [None]:
# Create sorted sequence of "raw events". Each "raw event" is univocally recognizes a note by its time-duration 
# and pitch. Notice, these "raw events" are obtained from originally isolated notes and from the notes obtained 
# from each original chord.
sorted_seq_raw_events = sorted( [ ( ti , td + ':' + p ) for ( ti , p , td ) in seq_triplets ] )
sorted_seq_raw_events[:10]

In [None]:
# Create the sorted sequence of notes.
np.random.shuffle(sorted_seq_raw_events)
sorted_seq_notes = [ ev for (ti,ev) in sorted_seq_raw_events ]
sorted_seq_notes[:10]

In [None]:
# Create the sorted sequence of generalized-chords.
tmp = defaultdict( list )
for ( ti , re ) in sorted_seq_raw_events:
    tmp[ ti ].append( re ) # group raw events by ti
sorted_seq_generalized_chords = [ '='.join( sorted( tmp[ ti ] ) ) for ti in sorted( tmp.keys() ) ]
sorted_seq_generalized_chords[:10]

In [None]:
# Compute rank-frequency distribution for the sequence of notes
f_w = defaultdict(float)
for note in sorted_seq_notes:
    f_w[note] += 1.0
notes_ranks = []
notes_frequencies = []
for rr,(f,w) in enumerate(sorted([(f,w) for (w,f) in f_w.items()],reverse=True)):
    r=rr+1
    #print r,f,w
    notes_ranks.append(r)
    notes_frequencies.append(f)

# Compute rank-frequency distribution for the sequence of generlized chords.
f_w = defaultdict(float)
for gen_chord in sorted_seq_generalized_chords:
    f_w[gen_chord] += 1.0
gen_chord_ranks = []
gen_chord_frequencies = []
for rr,(f,w) in enumerate(sorted([(f,w) for (w,f) in f_w.items()],reverse=True)):
    r=rr+1
    #print r,f,w
    gen_chord_ranks.append(r)
    gen_chord_frequencies.append(f)
    
# Plot curves in log-log   
plt.title("Brandenburg Concerto No. 5 in D Major (BWV 1050) for Flute, Oboe & Strings")
plt.xlabel("r")
plt.ylabel("f");
plt.loglog()

zipf_law_ranks = range(10,2000)
zipf_law_frequencies = [1000.0/r for r in zipf_law_ranks]

plt.plot(notes_ranks, notes_frequencies,label="notes")
plt.plot(gen_chord_ranks, gen_chord_frequencies,label="generalized chords")
plt.plot(zipf_law_ranks, zipf_law_frequencies,label="Zipf's law")

plt.legend()
plt.show()