In [None]:
import os
from keras.models import load_model,Model
from keras.layers import Lambda,Dense,Input
import keras.backend as K
import librosa.display
import cv2
import librosa
import matplotlib.pyplot as plt
import numpy as np
import string
import random
import tensorflow as tf

def create_spectrogram(clip,sample_rate,save_path):
  plt.interactive(False)
  fig=plt.figure(figsize=[0.72,0.72])
  ax=fig.add_subplot(111)
  ax.axes.get_xaxis().set_visible(False)
  ax.axes.get_yaxis().set_visible(False)
  ax.set_frame_on(False)
  S=librosa.feature.melspectrogram(y=clip,sr=sample_rate)
  librosa.display.specshow(librosa.power_to_db(S,ref=np.max))
  fig.savefig(save_path,dpi=400,bbox_inches='tight',pad_inches=0)
  plt.close()
  fig.clf()
  plt.close(fig)
  plt.close('all')
  del save_path,clip,sample_rate,fig,ax,S

def load_and_preprocess_image(path):
    img = cv2.imread(path)
    if img is None:
        print(f"Warning: Could not load image at {path}")
        return None
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (150, 150))
    img = img / 255.0  # Normalize pixel values
    return img

In [None]:
model=tf.keras.models.load_model('embdmodel.keras', safe_mode=False)
model.summary(expand_nested=True,
    show_trainable=True,)

In [None]:
embedding_model=model.layers[2]
embedding_model.summary(expand_nested=True,
    show_trainable=True,)

In [None]:
#Read the songs,divide them into 10s segment,create spectrogram of them

charsets=string.ascii_letters

songspecdict={}

os.makedirs('./Test_Spectrograms/', exist_ok=True)
songs_path = '/content/Music/'
songs_list = [f for f in os.listdir(songs_path) if os.path.isfile(os.path.join(songs_path, f))] # Filter for files only

def get_random_name():
    name=''.join([random.choice(charsets) for _ in range(20)])
    name=name+str(np.random.randint(0,1000))
    return name

for song in songs_list:
    print(song)
    songfile,sr=librosa.load(songs_path+song)
    duration=librosa.get_duration(y=songfile,sr=sr)
    prev=0
    emblist=[]
    for i in range(1,int((duration//10)+1)):
        if i==int((duration//10)):
            """Since we are dividing the song in 10s segment there might be case that after taking 10
            fragments also few more seconds are left so in this case extra becomes extra=extra+(10-extra)
            from the previous segment."""
            extra=int((int(duration)/10-int(int(duration)/10))*10)
            st=(sr*i*10)-(10-extra)
            end=st+10
            songfrag=np.copy(songfile[st:end])
        else:
            songfrag=np.copy(songfile[prev:(sr*i*10)])
        specname=get_random_name()
        create_spectrogram(songfrag,sr,'./Test_Spectrograms/'+specname+'.png')
        img_t=load_and_preprocess_image('./Test_Spectrograms/'+specname+'.png') # Added .png extension
        img_t=np.expand_dims(img_t,axis=0)
        emb=embedding_model.predict(img_t)
        emblist.append(emb)



        prev=sr*i*10
    songspecdict[song]=emblist


import pickle

with open('dict.pickle', 'wb') as handle:
    pickle.dump(songspecdict, handle, protocol=pickle.HIGHEST_PROTOCOL)

Donell Jones - This Luv [cQUYe18YmSw].mp3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37

In [None]:
import pickle
with open('dict.pickle', 'rb') as handle:
    songspecdict = pickle.load(handle)

In [None]:
songs_path = '/content/Music/'
song,sr=librosa.load(songs_path+'Alicia Keys - Un-thinkable (I\'m Ready) (Official Video).mp3')
to_match=np.copy(song[0:220500])
create_spectrogram(to_match,22050,'test.png')
songsdistdict={}

to_match_img=load_and_preprocess_image('test.png')
to_match_img=np.expand_dims(to_match_img,axis=0)

to_match_emb=embedding_model.predict(to_match_img)

for key,values in songspecdict.items():
    dist_array=[]
    for embd in values:
        dist_array.append(np.linalg.norm(to_match_emb-embd))

    songsdistdict[key]=min(dist_array)

song_titles=list(songsdistdict.keys())
distances=list(songsdistdict.values())

print(f'Recognized Song={song_titles[distances.index(min(distances))]}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
Recognized Song=Alicia Keys - Un-thinkable (I'm Ready) (Official Video).mp3
