In [2]:
import os
import json
import pandas as pd
from glob import glob
import torch
from IPython.display import Audio as player
from panns_inference import AudioTagging
from qdrant_client import QdrantClient
from qdrant_client.http import models
from utils.audio import download_kaggle_dataset, generate_random_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [38]:
download_kaggle_dataset("jorgeruizdev/ludwig-music-dataset-moods-and-subgenres")

Dataset URL: https://www.kaggle.com/datasets/jorgeruizdev/ludwig-music-dataset-moods-and-subgenres
License(s): CC-BY-NC-SA-4.0
Downloading ludwig-music-dataset-moods-and-subgenres.zip to /Users/julian_castro/Documents/projects/audio_app/notebooks


100%|██████████| 11.3G/11.3G [07:55<00:00, 25.5MB/s]  





In [None]:
genres = ["latin", "pop", "reggae", "rock", "electronic"]
data_set = generate_random_dataset(genres, 130)
print(type(data_set))
item = data_set[0]
print(item)

Loading latin...
Loading pop...
Loading reggae...
Loading rock...
Loading electronic...
<class 'datasets.arrow_dataset.Dataset'>
{'audio': {'path': '/Users/julian_castro/Documents/projects/audio_app/data/ludwig-music-dataset-moods-and-subgenres/mp3/mp3/electronic/0vsOxIBK36keLH8Bezie9A.mp3', 'array': array([ 0.00000000e+00, -8.69972094e-09, -4.86083040e-09, ...,
       -1.59396112e-01, -1.23613618e-01, -1.67727619e-01]), 'sampling_rate': 44100}}


In [6]:
ids = [
    (data_set[i]["audio"]["path"].split("/")[-1].replace(".mp3", ""))
    for i in range(len(data_set))
]

index = [num for num in range(len(data_set))]
ids[:4]

['0vsOxIBK36keLH8Bezie9A',
 '0jmfiZP6MBtrORE1vpvFWU',
 '3ddxZzCKZpTObDygewlwXB',
 '3GCucY8KSMS80NEU0ey0by']

In [7]:
data_set = data_set.add_column("index", index)
data_set = data_set.add_column("id", ids)
data_set[-1]

{'audio': {'path': '/Users/julian_castro/Documents/projects/audio_app/data/ludwig-music-dataset-moods-and-subgenres/mp3/mp3/latin/6K5UtSAXChS6Rpm2NZJnrF.mp3',
  'array': array([ 0.00000000e+00, -4.89616880e-09,  3.03772474e-09, ...,
         -3.03649858e-01, -3.14786166e-01, -2.29230180e-01]),
  'sampling_rate': 44100},
 'index': 649,
 'id': '6K5UtSAXChS6Rpm2NZJnrF'}

In [13]:
label_path = "./../data/ludwig-music-dataset-moods-and-subgenres/labels.json"
labels = pd.read_json(label_path)
labels.head()

Unnamed: 0,tracks
35ecMLCJ1x2giJuvHLrI1t,{'otherSubgenres': {'L': [{'S': 'electronic---...
3p0EUhkUeCNrBIZwkjmeYe,"{'otherSubgenres': {'L': []}, 'artist': {'S': ..."
0rb6HvdvWJRkyhxsfFf1ep,"{'otherSubgenres': {'L': [{'S': 'rock'}, {'S':..."
4ssD5IkaicvM3L2Ff8FPWQ,"{'otherSubgenres': {'L': []}, 'artist': {'S': ..."
586ncAs8cYRTBlrxMDfmSP,{'otherSubgenres': {'L': [{'S': 'electronic---...


In [14]:
def get_metadata(item: dict) -> pd.Series:
    cols = ["artist", "genre", "name", "subgenres", "sad", "happy", "party", "popularity"]
    list_of_cols = []
    for col in cols:
        try:
            mdata = list(item[col].values())[0]
        except:
            mdata = "Unknown"
        list_of_cols.append(mdata)
    
    return pd.Series(list_of_cols, index=cols)

In [15]:
labels = labels["tracks"].apply(get_metadata).reset_index()
labels.head()

Unnamed: 0,index,artist,genre,name,subgenres,sad,happy,party,popularity
0,35ecMLCJ1x2giJuvHLrI1t,Riovolt,electronic,It Ain't Over 'till It's Over,"[{'S': 'electronic---ambient'}, {'S': 'electro...",0.808553755283,0.117319412529,0.0187958143651,31
1,3p0EUhkUeCNrBIZwkjmeYe,R.L. Burnside,blues,Fireman Ring the Bell,[{'S': 'blues---country blues'}],0.192250967026,0.589263141155,0.000199172980501,30
2,0rb6HvdvWJRkyhxsfFf1ep,Chapterhouse,rock,Falling Down,[{'S': 'rock---shoegaze'}],0.0304505825043,0.447863191366,0.909360527992,36
3,4ssD5IkaicvM3L2Ff8FPWQ,Lowell Fulsom,funk / soul,Tramp,[{'S': 'funk / soul---rhythm & blues'}],0.584066450596,0.448383301497,0.19365106523,30
4,586ncAs8cYRTBlrxMDfmSP,Paul Ellis,electronic,Dissolve,[{'S': 'electronic---ambient'}],0.515594601631,0.371642351151,0.0393997617066,0


In [17]:
def get_vals_from_subgenres(genres: list) -> list:
    genre_list = []
    for d in genres:
        if type(d) != str:
            for _, val in d.items():
                genre_list.append(val)
    
    return genre_list

In [18]:
labels["subgenres"] = labels["subgenres"].apply(get_vals_from_subgenres)
labels["subgenres"].head()

0    [electronic---ambient, electronic---downtempo,...
1                              [blues---country blues]
2                                    [rock---shoegaze]
3                       [funk / soul---rhythm & blues]
4                               [electronic---ambient]
Name: subgenres, dtype: object

In [20]:
files = []
for genre in genres:
    files_path = os.path.join("./../data/ludwig-music-dataset-moods-and-subgenres", "mp3", "mp3", genre, "*.mp3")
    genre_files = glob(files_path)
    files.extend(genre_files)

ids = [file.split("/")[-1].replace(".mp3", "") for file in files]
music_paths = pd.DataFrame(zip(ids, files), columns=["id", "url"])
music_paths.head()

Unnamed: 0,id,url
0,5f1SjUy6ySgaEUIIy2m9l4,./../data/ludwig-music-dataset-moods-and-subge...
1,03tbpnBQ9kiAL8GX0ouZUG,./../data/ludwig-music-dataset-moods-and-subge...
2,67wqhzuPtGbZNYG1eVoLsd,./../data/ludwig-music-dataset-moods-and-subge...
3,0YfDtPub9AsTu4278mDWJE,./../data/ludwig-music-dataset-moods-and-subge...
4,7vH4D94WWhAdjll6b62wiw,./../data/ludwig-music-dataset-moods-and-subge...


In [21]:
metadata = data_set.select_columns(["index", "id"]) \
    .to_pandas() \
    .merge(right=labels, how="left", left_on="id", right_on="index") \
    .merge(right=music_paths, how="left", left_on="id", right_on="id") \
    .drop("index_y", axis=1) \
    .rename({"index_x": "index"}, axis=1)

metadata.head()

Unnamed: 0,index,id,artist,genre,name,subgenres,sad,happy,party,popularity,url
0,0,0vsOxIBK36keLH8Bezie9A,Pendulum,electronic,Different,[electronic---drum n bass],0.159620672464,0.257579416037,0.977688491344,41,./../data/ludwig-music-dataset-moods-and-subge...
1,1,0jmfiZP6MBtrORE1vpvFWU,Nightmares On Wax,electronic,"Be, I Do","[electronic---downtempo, electronic---trip hop...",0.313051044941,0.187118664384,0.399286955595,44,./../data/ludwig-music-dataset-moods-and-subge...
2,2,3ddxZzCKZpTObDygewlwXB,Slow Magic,electronic,Waited 4 U,[electronic---electro],0.608639240265,0.105242662132,0.0857971906662,55,./../data/ludwig-music-dataset-moods-and-subge...
3,3,3GCucY8KSMS80NEU0ey0by,Pendulum,electronic,The Other Side,[electronic---drum n bass],0.209101587534,0.158047914505,0.991388976574,46,./../data/ludwig-music-dataset-moods-and-subge...
4,4,12BJTGESsHRonZSBGw9XXi,P-Model,electronic,Goes on Ghost,"[electronic---electro, electronic---new wave, ...",0.680889248848,0.449430823326,0.170126751065,7,./../data/ludwig-music-dataset-moods-and-subge...


## Create the payloads for each vector

In [22]:
payload = metadata.drop(["index", "id"], axis=1).to_dict(orient="records")
payload[:3]

[{'artist': 'Pendulum',
  'genre': 'electronic',
  'name': 'Different',
  'subgenres': ['electronic---drum n bass'],
  'sad': '0.159620672464',
  'happy': '0.257579416037',
  'party': '0.977688491344',
  'popularity': '41',
  'url': './../data/ludwig-music-dataset-moods-and-subgenres/mp3/mp3/electronic/0vsOxIBK36keLH8Bezie9A.mp3'},
 {'artist': 'Nightmares On Wax',
  'genre': 'electronic',
  'name': 'Be, I Do',
  'subgenres': ['electronic---downtempo',
   'electronic---trip hop',
   'hip hop---trip hop'],
  'sad': '0.313051044941',
  'happy': '0.187118664384',
  'party': '0.399286955595',
  'popularity': '44',
  'url': './../data/ludwig-music-dataset-moods-and-subgenres/mp3/mp3/electronic/0jmfiZP6MBtrORE1vpvFWU.mp3'},
 {'artist': 'Slow Magic',
  'genre': 'electronic',
  'name': 'Waited 4 U',
  'subgenres': ['electronic---electro'],
  'sad': '0.608639240265',
  'happy': '0.105242662132',
  'party': '0.0857971906662',
  'popularity': '55',
  'url': './../data/ludwig-music-dataset-moods-an

### Embeddings creation

In [23]:
audio_tagging = AudioTagging(checkpoint_path=None, device="cpu")

Checkpoint path: /Users/julian_castro/panns_data/Cnn14_mAP=0.431.pth


  checkpoint = torch.load(checkpoint_path, map_location=self.device)


Using CPU.


In [24]:
def get_embeddings(batch: dict):
    arrays = [torch.tensor(v['array'], dtype=torch.float64) for v in batch['audio']]
    inputs = torch.nn.utils.rnn.pad_sequence(arrays, batch_first=True, padding_value=0).type(torch.FloatTensor)
    with torch.no_grad():
        _, embedding = audio_tagging.inference(inputs)
    
    batch["panns_embeddings"] = embedding
    return batch

In [25]:
with torch.inference_mode():
    data_set = data_set.map(get_embeddings, batched=True, batch_size=8)

Map: 100%|██████████| 650/650 [12:41<00:00,  1.17s/ examples]


In [26]:
client = QdrantClient("http://localhost:6333")

COLLECTION_NAME = "music_collection"
client.recreate_collection(
    collection_name=COLLECTION_NAME,
    vectors_config=models.VectorParams(size=2048, distance=models.Distance.COSINE)
)

  client.recreate_collection(


True

In [27]:
ds_pandas = data_set.to_pandas()

client.upsert(
    collection_name=COLLECTION_NAME,
    points=models.Batch(
        ids=ds_pandas["index"],
        vectors=ds_pandas["panns_embeddings"],
        payloads=payload
    )
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [43]:
result = client.retrieve(
    collection_name=COLLECTION_NAME,
    ids=[420]
)

result

[Record(id=420, payload={'artist': 'Elvis Presley', 'genre': 'pop', 'name': 'Burning Love', 'subgenres': ['pop---ballad', 'rock---pop rock'], 'sad': '0.197843462229', 'happy': '0.797862350941', 'party': '0.57128059864', 'popularity': '73', 'url': './../data/ludwig-music-dataset-moods-and-subgenres/mp3/mp3/pop/7zMUCLm1TN9o9JlLISztxO.mp3'}, vector=None, shard_key=None, order_value=None)]

In [45]:
print(ds_pandas.iloc[420])

client.search(
    collection_name=COLLECTION_NAME,
    query_vector=ds_pandas.iloc[420]["panns_embeddings"],
    limit=5
)

audio               {'bytes': b'RIFF$\\(\x00WAVEfmt \x10\x00\x00\x...
index                                                             420
id                                             7zMUCLm1TN9o9JlLISztxO
panns_embeddings    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
Name: 420, dtype: object


[ScoredPoint(id=420, version=0, score=1.0, payload={'artist': 'Elvis Presley', 'genre': 'pop', 'name': 'Burning Love', 'subgenres': ['pop---ballad', 'rock---pop rock'], 'sad': '0.197843462229', 'happy': '0.797862350941', 'party': '0.57128059864', 'popularity': '73', 'url': './../data/ludwig-music-dataset-moods-and-subgenres/mp3/mp3/pop/7zMUCLm1TN9o9JlLISztxO.mp3'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=213, version=0, score=0.96168196, payload={'artist': 'Chicago', 'genre': 'rock', 'name': 'The American Dream', 'subgenres': ['rock---pop rock'], 'sad': '0.0994533225894', 'happy': '0.861504614353', 'party': '0.0000157784197654', 'popularity': '9', 'url': './../data/ludwig-music-dataset-moods-and-subgenres/mp3/mp3/rock/5u9vhCMHqFfVW1QrOFX0ka.mp3'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=638, version=0, score=0.9250487, payload={'artist': 'Sin Bandera', 'genre': 'latin', 'name': 'Lo Ves', 'subgenres': ['latin---salsa', 'pop---ballad'], 'sa