In [None]:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from itertools import combinations
import jams
import glob
from os import listdir
from os.path import isfile,join
import json
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from sklearn import metrics
import tensorflow_datasets as tfds

In [None]:
jams_list = {}
for anno in glob.glob("transcrybe/annotation/*"):
  jams_list[anno[22:-5]]=jams.load(anno)

In [None]:
def get_notes(j,start,stop):
  jam=jams_list[j]
  notes = []
  annos = jam.search(namespace='note_midi')
  if len(annos) == 0:
      annos = jam.search(namespace='pitch_midi')
  for anno in annos:
      for note in anno:
          if (note.time >= start and note.time <= stop) or (note.time+note.duration >= start and note.time+note.duration <= stop) or (note.time < start and note.time+note.duration > stop and note.duration > (stop-start)):
            pitch = int(round(note.value))
            notes.append(pitch)
  if len(notes) > 6:
    notes = np.delete(notes, np.s_[6::])
  return np.unique(notes)
  
jam='00_BN1-129-Eb_comp'
start=1.0
stop=1.2
get_notes(jam,start,stop)

array([51, 58, 62, 67])

In [None]:
Fret = np.zeros((6, 18), dtype = np.int32)
for q in range(0, 6):
    for e in range(0, 18):
        if q == 0:
            Fret[q, e] = 40 + e
        elif q == 1:
            Fret[q, e] = 45 + e
        elif q == 2:
            Fret[q, e] = 50 + e
        elif q == 3:
            Fret[q, e] = 55 + e
        elif q == 4:
            Fret[q, e] = 59 + e
        elif q == 5:
            Fret[q, e] = 64 + e

def get_tabs(jam,start,stop):
  sol=[]
  MIDI_val = get_notes(jam,start,stop)
  for t in range(0, len(MIDI_val)):
      Fret_played = (Fret == int(MIDI_val[t])).astype(int)
      sol.append(Fret_played.tolist())

  n = len(MIDI_val)
  if n == 0: return np.zeros((6, 18), dtype = np.int32).tolist() # no notes played

  combs = list(combinations(range(6), n))  # generate all possible combinations of n elements from 0 to 5
  result = [c for c in combs if all(c[i] < c[i+1] for i in range(n-1))]  # filter the combinations that are in increasing order

  tabs=[]
  for i in result: # pick the strings
    tab = np.zeros((6, 18), dtype = np.int32)
    add = True
    for j in range(0,n): # loop through notes
      if 1 in sol[j][i[j]]:
        tab[i[j]]=sol[j][i[j]]
      else: 
        add = False
        break
    if add: tabs.append(tab)
    
  best = []
  b_root = 999
  min_score = 999
  for tab in tabs:
    root = 999
    score = 0
    for i in tab:
      if 1 in i:
        index = np.argwhere(i==1)[0][0]
        if root == 999: root = index
        if index != 0: score += abs(np.argwhere(i==1)[0][0] - root)
    if score < min_score:
      min_score = score
      best = tab.tolist()
      b_root = root
    elif score == min_score:
      if(root < b_root):
        min_score = score
        best = tab.tolist()
        b_root = root
  return best

In [None]:
get_notes('05_SS2-88-F_comp',35.6,35.8)

array([48, 52, 60])

In [None]:
get_tabs('05_SS2-88-F_comp',35.6,35.8)

[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]

In [None]:
Fret = np.zeros((6, 19), dtype = np.int32)
for q in range(0, 6):
    for e in range(1, 19):
        if q == 0:
            Fret[q, e] = 39 + e
        elif q == 1:
            Fret[q, e] = 44 + e
        elif q == 2:
            Fret[q, e] = 49 + e
        elif q == 3:
            Fret[q, e] = 54 + e
        elif q == 4:
            Fret[q, e] = 58 + e
        elif q == 5:
            Fret[q, e] = 63 + e

def get_tabs(jam,start,stop):
  sol=[]
  MIDI_val = get_notes(jam,start,stop)
  for t in range(0, len(MIDI_val)):
      Fret_played = (Fret == int(MIDI_val[t])).astype(int)
      sol.append(Fret_played.tolist())

  n = len(MIDI_val)
  if n == 0: return np.zeros((6, 19), dtype = np.int32).tolist() # no notes played

  combs = list(combinations(range(6), n))  # generate all possible combinations of n elements from 0 to 5
  result = [c for c in combs if all(c[i] < c[i+1] for i in range(n-1))]  # filter the combinations that are in increasing order

  tabs=[]
  for i in result: # pick the strings
    tab = np.zeros((6, 19), dtype = np.int32)
    for q in tab: q[0]=1
    add = True
    for j in range(0,n): # loop through notes
      if 1 in sol[j][i[j]]:
        sol[j][i[j]][0]=0
        tab[i[j]]=sol[j][i[j]]
      else: 
        add = False
        break
    if add: tabs.append(tab)
    
  best = []
  b_root = 999
  min_score = 999
  for tab in tabs:
    root = 999
    score = 0
    for i in tab[:,1:]:
      if 1 in i:
        index = np.argwhere(i==1)[0][0]
        if root == 999: root = index
        if index != 0: score += abs(np.argwhere(i==1)[0][0] - root)
    if score < min_score:
      min_score = score
      best = tab.tolist()
      b_root = root
    elif score == min_score:
      if(root < b_root):
        min_score = score
        best = tab.tolist()
        b_root = root
  return best

In [None]:
get_notes('05_SS2-88-F_comp',35.6,35.8)

array([48, 52, 60])

In [None]:
get_tabs('05_SS2-88-F_comp',35.6,35.8)

[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]

In [None]:
a = [f[:-5] for f in listdir('transcrybe/annotation') if isfile(join('transcrybe/annotation', f))]
for anno in a:
  tabs={}
  plots = [f[:-4] for f in listdir('transcrybe/plot_audio') if (isfile(join('transcrybe/plot_audio', f)) and anno in f)]
  for plot in plots:
    jam = plot[:-3]
    start = round(int(plot[-3:])*0.2,1)
    stop = start+0.2
    tabs[plot]=get_tabs(jam,start,stop)
  path = "tabs2/"+plot[:-3]+".txt"
  json.dump(tabs, open(path,'w'))

In [None]:
json.load(open("transcrybe/tabs2/tabs2/03_Jazz1-130-D_solo.txt"))['03_Jazz1-130-D_solo110']

[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]

In [None]:
x = []
y = []
a = [f for f in listdir('transcrybe/plot_audio') if isfile(join('transcrybe/plot_audio', f))]
for plot in a:
  try:
    imgp = 'transcrybe/plot_audio/'+plot
    img = tf.keras.preprocessing.image.load_img(imgp,color_mode='grayscale',target_size=(64, 64))
    tabp = 'transcrybe/tabs2/'+plot[:-7]+'.txt'
    tab = json.load(open(tabp))[plot[:-4]]
    i=tf.keras.preprocessing.image.img_to_array(img).reshape(64,64)/255
    if(len(tab)!=0):
      x.append(i)
      y.append(tab)
  except:
    continue

In [None]:
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2)

In [None]:
xtrain = np.array(xtrain)
xtest = np.array(xtest)
ytrain = np.array(ytrain)
ytest = np.array(ytest)

In [None]:
# Training (Functional Method)
model = keras.Sequential()
model.add(keras.layers.Conv2D(64, kernel_size =(3, 3), activation='relu',input_shape=(64, 64, 1)))
model.add(keras.layers.Conv2D(64, kernel_size = (3, 3), activation = 'relu'))
model.add(keras.layers.Conv2D(64, kernel_size = (3, 3), activation = 'relu'))
model.add(keras.layers.MaxPooling2D(pool_size = (2, 2)))
model.add(keras.layers.Conv2D(64, kernel_size = (3, 3), activation = 'relu'))
model.add(keras.layers.MaxPooling2D(pool_size = (2, 2)))
model.add(keras.layers.Flatten())

model.add(keras.layers.Dense(152, activation = 'relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(76))
model.add(keras.layers.Dropout(0.2))

# Connect heads to final output layer
model.add(keras.layers.Dense(19*6, activation = 'softmax'))
model.add(keras.layers.Reshape((6,19)))

model.compile(optimizer = keras.optimizers.Adam(lr=0.01), loss = 'categorical_crossentropy',
              metrics = ['accuracy'])



In [None]:
history = model.fit(xtrain, ytrain, epochs = 4, verbose = 1, 
                    validation_data = (xtest, ytest))
score = model.evaluate(xtest, ytest, verbose = 1)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [None]:
model.save_weights('final_model.h5')

In [None]:
model=keras.models.load_model("model.h5")

In [None]:
a=model.predict(xtrain[0:1])
a



array([[[2.5449467e-01, 2.0598336e-03, 1.4713472e-03, 2.0609281e-03,
         2.4332975e-03, 6.5900027e-03, 2.3737771e-03, 4.1230121e-03,
         1.5700237e-03, 1.9498143e-03, 2.0847165e-03, 2.0686807e-03,
         2.0445075e-03, 1.3149033e-03, 2.0350923e-03, 6.7735015e-04,
         1.7407200e-04, 1.3188108e-04, 1.4689237e-04],
        [1.5703712e-01, 1.7932594e-03, 2.2736213e-03, 2.7825502e-03,
         2.8909561e-03, 4.1422448e-03, 3.7197848e-03, 5.2860016e-03,
         2.3821895e-03, 2.6842067e-03, 1.0935036e-03, 6.7005085e-04,
         2.4651559e-03, 1.7982892e-03, 2.0395990e-03, 1.8586299e-03,
         7.3872978e-04, 3.9038164e-04, 2.2713316e-04],
        [3.2755069e-02, 2.5810786e-03, 1.5484366e-03, 3.3429412e-03,
         6.1917650e-03, 8.5049216e-03, 4.3924805e-03, 5.2174237e-03,
         3.8121229e-03, 3.6168792e-03, 1.8409844e-03, 9.1140735e-04,
         2.2215873e-03, 1.3156296e-03, 1.5389419e-03, 6.2816375e-04,
         5.2516436e-04, 1.9286270e-04, 8.3949402e-05],
       

In [None]:
def audio_CQT(audio_path, start, dur):  # start and dur in seconds
    
    # Function for removing noise
    def cqt_lim(CQT):
        new_CQT = np.copy(CQT)
        new_CQT[new_CQT < -60] = -120
        return new_CQT
    
    # Perform the Constant-Q Transform
    data, sr = librosa.load(audio_path, sr = None, mono = True, offset = start, duration = dur)
    CQT = librosa.cqt(data, sr = 44100, hop_length = 1024, fmin = None, n_bins = 96, bins_per_octave = 12)
    CQT_mag = librosa.magphase(CQT)[0]**4
    CQTdB = librosa.core.amplitude_to_db(CQT_mag, ref = np.amax)
    new_CQT = cqt_lim(CQTdB)
    return new_CQT

In [None]:
def auto_tab(wav):
  tabs = []
  final_tab = ''
  y,sr=librosa.load(wav)
  dur = librosa.get_duration(y=y,sr=sr) 
  for j in np.arange(0,dur,0.2):
    C = audio_CQT(wav,j,0.2)
    fig, ax = plt.subplots()
    plt.axis('off')
    img = librosa.display.specshow(C, x_axis='time', y_axis='cqt_note', ax=ax, cmap='gray_r')
    plt.savefig("test.png")
    plt.close()
    img = tf.keras.preprocessing.image.load_img('test.png',color_mode='grayscale',target_size=(64, 64))
    arr = tf.keras.preprocessing.image.img_to_array(img).reshape(64,64)/255
    tab = model.predict(np.array([arr]))
    for i in tab[0]:
      ir=np.array(i)
      a = np.zeros_like(ir, dtype=float)  # Create an array of zeros with the same shape as arr
      max_index = np.argmax(ir)  # Get the index of the largest value in arr
      a[max_index] = 1
      a.astype(int).tolist()
      tabs.append(a)
  for i in range(5,len(tabs),6):
    if len(np.nonzero(tabs[i]==1)[0])!=0:
      index = np.nonzero(tabs[i]==1)[0][0] - 1
      if index != -1: 
        final_tab += '--'+str(index)+'--'
        continue
    final_tab += '-----'
  final_tab+='\n'
  for i in range(4,len(tabs),6):
    if len(np.nonzero(tabs[i]==1)[0])!=0:
      index = np.nonzero(tabs[i]==1)[0][0] - 1
      if index != -1: 
        final_tab += '--'+str(index)+'--'
        continue
    final_tab += '-----'
  final_tab+='\n'
  for i in range(3,len(tabs),6):
    if len(np.nonzero(tabs[i]==1)[0])!=0:
      index = np.nonzero(tabs[i]==1)[0][0] - 1
      if index != -1: 
        final_tab += '--'+str(index)+'--'
        continue
    final_tab += '-----'
  final_tab+='\n'
  for i in range(2,len(tabs),6):
    if len(np.nonzero(tabs[i]==1)[0])!=0:
      index = np.nonzero(tabs[i]==1)[0][0] - 1
      if index != -1: 
        final_tab += '--'+str(index)+'--'
        continue
    final_tab += '-----'
  final_tab+='\n'
  for i in range(1,len(tabs),6):
    if len(np.nonzero(tabs[i]==1)[0])!=0:
      index = np.nonzero(tabs[i]==1)[0][0] - 1
      if index != -1: 
        final_tab += '--'+str(index)+'--'
        continue
    final_tab += '-----'
  final_tab+='\n'
  for i in range(0,len(tabs),6):
    if len(np.nonzero(tabs[i]==1)[0])!=0:
      index = np.nonzero(tabs[i]==1)[0][0] - 1
      if index != -1: 
        final_tab += '--'+str(index)+'--'
        continue
    final_tab += '-----'
  final_tab+='\n'
  print(final_tab)


In [None]:
auto_tab('transcrybe/c chord.wav')

---------------------------------------------------------------------------------------------------------
-------1----1----1----1----1----1------------------------------------------------------------------------
-------0----0----0----0----0----0----0----0----0----0----0----0----0----0----0----0----0----0----0----0--
-------2----2----2----2----2----2----2----2----2----2----2----2----2----2----2----2----2----2----2-------
-------3----3----3----3----3----3----3----3----3----3----3----3----3----3----3----3----3----3----3----3--
--0------------------------------------------------------------------------------------------------------



In [None]:
auto_tab('transcrybe/test.wav')

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------0----0------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
--------------------------------7----7----7-------------------4----------------------------------4-------------------4-----------------------------7-------------------4---------4-------------------3-------------------4----4----4----4----4----------------------------------7----7--------------------------