In [43]:
%matplotlib inline
import numpy as np, scipy, matplotlib.pyplot as plt, sklearn, IPython.display as ipd
import librosa, librosa.display
import warnings
warnings.simplefilter("ignore")
from midiutil import MIDIFile             #http://midiutil.readthedocs.io/en/1.2.1/
from music21.tempo import MetronomeMark   #http://web.mit.edu/music21/
from music21.note import Note, Rest
from music21.stream import Stream
from music21 import metadata
from music21 import instrument
from music21 import midi 
from music21.key import Key
import json
import muspy

Next steps:

- add dictionary for notes with a range
- convert to staff notation
- estimate tempo

In [None]:
x, sr = librosa.load("../../data/grandpiano.wav",duration=10)

In [None]:
ipd.Audio(x,rate=sr)

In [None]:
plt.figure(figsize=(14, 5))
librosa.display.waveshow(x,sr)

In [None]:
S = librosa.stft(x)
S_db = librosa.amplitude_to_db(abs(S))
plt.figure(figsize=(14, 4))
librosa.display.specshow(S, sr=sr, x_axis='time', y_axis='log')
plt.colorbar()

In [None]:
X, X_phase = librosa.magphase(S)
onsets = librosa.onset.onset_detect(y=x, sr=sr, units='time')
n_components = len(onsets)
W, H = librosa.decompose.decompose(X,n_components=n_components, sort=True)

In [None]:
S.shape
x.shape
H[1].shape

W contains information about the frequencies\
H contains information about temporal properties.

In [None]:
plt.figure(figsize=(20, 10))
for n in range(n_components):
    plt.subplot(5, 5, n+1)
    plt.plot(W[:,n])
    plt.ylim(-1, W.max())
    plt.xlim(0, W.shape[0])
    plt.ylabel('Component %d' % n)
    print(np.argmax(W[:,n]) * 11025/1025)

In [None]:
plt.figure(figsize=(20, 10))
for n in range(n_components):
    plt.subplot(5, 5, n+1)
    plt.plot(H[n])
    plt.ylim(0, H.max())
    plt.xlim(0, H.shape[1])
    plt.ylabel('Component %d' % n)

In [69]:
note_info = []
tuple_list = []
for n in range(n_components):
    spectral = W[:,n]
    temporal = H[n]

    onsets_ = librosa.util.peak_pick(temporal,pre_max = 6, post_max=6,pre_avg=50, post_avg=50,delta=temporal.max()/2,wait=0)

    #print(librosa.hz_to_midi(np.argmax(spectral) * 11025/1025))  #can detect half the sampling frequency
    n = librosa.hz_to_midi(np.argmax(spectral) * 11025/1025)
    for o in onsets_:
        tuple_list += [[round(o*10/431,3),round(n,3)]]
    #print(librosa.hz_to_note(np.argmax(spectral) * 11025/1025))  #can detect half the sampling frequency
    #print(onsets_ * 10/431)
sorted_list = sorted(tuple_list)
big_dictionary = ""
durations = []
for i in range(len(sorted_list)-10):
   for j in range(10):
        duration = -sorted_list[i+j][0] + sorted_list[i+1+j][0]
        if duration != 0:
            break
   sorted_list[i] += [round(duration,3)]
   #dictonary = {
    #"time":sorted_list[i][0],
    #"duration": duration,
    #"pitch": sorted_list[i][1],
    #"velocity": 64
   #}
   #big_dictionary += str(dictonary) + ","
print(sorted_list)


[[0.023, 63.044, 0.047], [0.023, 70.651, 0.047], [0.023, 77.798, 0.047], [0.07, 66.788, 0.069], [0.139, 77.017, 0.418], [0.557, 75.044, 0.51], [1.067, 79.495, 0.023], [1.09, 79.027, 1.207], [2.297, 79.495, 0.023], [2.32, 79.027, 0.163], [2.483, 81.249, 0.116], [2.599, 81.039, 0.046], [2.645, 79.495, 0.023], [2.645, 82.262, 0.023], [2.668, 69.864, 0.023], [2.668, 70.651, 0.023], [2.668, 82.262, 0.023], [2.691, 79.027, 1.021], [2.691, 82.064, 1.021], [3.712, 82.262, 0.186], [3.898, 84.301, 0.023], [3.921, 83.948, 0.139], [4.06, 79.495, 0.024], [4.06, 86.282, 0.024], [4.084, 69.864, 0.023], [4.107, 85.967, 0.301], [4.408, 84.301, 0.348], [4.756, 82.262, 0.024], [4.78, 82.262, 0.023], [4.803, 82.064, 0.162], [4.965, 74.125, 0.186], [5.151, 85.967, 0.325], [5.476, 69.039, 0.023], [5.476, 84.301, 0.023], [5.499, 77.798, 0.046], [5.499, 81.039, 0.046], [5.499, 83.948, 0.046], [5.545, 69.039, 0.464], [6.009, 69.039, 0.882], [6.891, 89.281, 0.023], [6.914, 89.017, 0.023], [6.937, 72.125, 1.578]

In [65]:
tempos = "tempos: " + str([{"time": 0, "qpm":tempo}])
res = tempos +  " tracks: [" + big_dictionary + "]"
js = json.loads(json.dumps(res))
with open("sample.json", "w") as outfile:
    json.dump(js, outfile)

In [None]:
 #s = music21.stream.Stream()
 #s.append(music21.key.Key('E-'))
 #s.append(music21.meter.TimeSignature('2/4'))
 #s.append(music21.note.Rest(quarterLength=0.5))
 #s.append(music21.note.Note('g', quarterLength=0.5))
 #s.append(music21.note.Note('e'))
 #s.append(music21.note.Note('g', quarterLength=0.5))
 #s.append(music21.note.Note('g', quarterLength=0.5))
 #s.append(music21.note.Note('e-', quarterLength=2))

 #s.show('text')

In [None]:
hop_length = 1024
tempo, beats=librosa.beat.beat_track(y=None, sr=sr, onset_envelope=onsets, hop_length=hop_length,
               start_bpm=120.0, tightness=100, trim=True, bpm=None,
               units='frames')
tempo=int(2*round(tempo/2))
mm = MetronomeMark(referent='quarter', number=tempo)

In [None]:
s = Stream()
s.append(mm)
#electricguitar = instrument.fromString('electric guitar')
#electricguitar.midiChannel=0
#electricguitar.midiProgram=30  #Set program to Overdriven Guitar
#s.append(electricguitar)
s.insert(0, metadata.Metadata())
for note in note_info:
    s.append(note)