## step 1 define data reader

In [1]:
import torchaudio
import os
from tqdm.auto import tqdm
import torch
from torch.utils.data import DataLoader
from transformers import Wav2Vec2FeatureExtractor
import random
import soundfile
#import palimpzest as pz
import chromadb
import pandas as pd

from dotenv import load_dotenv

load_dotenv()

True

In [2]:
import pdb

In [3]:
import palimpzest as pz

  return torch._C._cuda_getDeviceCount() > 0


In [4]:
from palimpzest.core.lib.fields import (
    BooleanField,
    BytesField,
    Field,
    FloatField,
    IntField,
    ListField,
    NumericField,
    StringField,
    AudioField
)

In [5]:
audio2text_cols=[
    {"name":"song_id", 'type':str, 'desc': 'Uniquu track ID of the song'},
    {"name": "audio_10", 'type': AudioField, 'desc':'path to audio file'},
    {"name": "audio_30", 'type': AudioField, 'desc':'path to audio file'},
    {"name": "audio_60", 'type': AudioField, 'desc':'path to audio file'},

]

In [6]:
def load_audio(
    file_path,
    target_sr,
    is_mono=True,
    is_normalize=False,
    crop=True,
    crop_to_length_in_sec=None,
    crop_to_length_in_sample_points=None,
    crop_randomly=False,
    pad=False,
    return_start=False,
    device=torch.device('cpu')
):
    """Load audio file and convert to target sample rate.
    Supports cropping and padding.

    Args:
        file_path (str): path to audio file
        target_sr (int): target sample rate, if not equal to sample rate of audio file, resample to target_sr
        is_mono (bool, optional): convert to mono. Defaults to True.
        is_normalize (bool, optional): normalize to [-1, 1]. Defaults to False.
        crop_to_length_in_sec (float, optional): crop to specified length in seconds. Defaults to None.
        crop_to_length_in_sample_points (int, optional): crop to specified length in sample points. Defaults to None. Note that the crop length in sample points is calculated before resampling.
        crop_randomly (bool, optional): crop randomly. Defaults to False.
        pad (bool, optional): pad to specified length if waveform is shorter than specified length. Defaults to False.
        device (torch.device, optional): device to use for resampling. Defaults to torch.device('cpu').

    Returns:
        torch.Tensor: waveform of shape (1, n_sample)
    """
    # TODO: deal with target_depth
    try:
        waveform, sample_rate = torchaudio.load(file_path)
    except Exception as e:
        waveform, sample_rate = torchaudio.backend.soundfile_backend.load(file_path)
    if waveform.shape[0] > 1:
        if is_mono:
            waveform = torch.mean(waveform, dim=0, keepdim=True)

    if is_normalize:
        waveform = waveform / waveform.abs().max()

    if crop:

        waveform, start = crop_audio(
            waveform,
            sample_rate,
            crop_to_length_in_sec=crop_to_length_in_sec,
            crop_to_length_in_sample_points=crop_to_length_in_sample_points,
            crop_randomly=crop_randomly,
            pad=pad,
        )

    if sample_rate != target_sr:
        resampler = torchaudio.transforms.Resample(sample_rate, target_sr)
        waveform = waveform.to(device)
        resampler = resampler.to(device)
        waveform = resampler(waveform)

    if return_start:
        return waveform, start
    return waveform


def crop_audio(
    waveform,
    sample_rate,
    crop_to_length_in_sec=None,
    crop_to_length_in_sample_points=None,
    crop_randomly=False,
    pad=False,
):
    """Crop waveform to specified length in seconds or sample points.
    Supports random cropping and padding.

    Args:
        waveform (torch.Tensor): waveform of shape (1, n_sample)
        sample_rate (int): sample rate of waveform
        crop_to_length_in_sec (float, optional): crop to specified length in seconds. Defaults to None.
        crop_to_length_in_sample_points (int, optional): crop to specified length in sample points. Defaults to None.
        crop_randomly (bool, optional): crop randomly. Defaults to False.
        pad (bool, optional): pad to specified length if waveform is shorter than specified length. Defaults to False.

    Returns:
        torch.Tensor: cropped waveform
        int: start index of cropped waveform in original waveform
    """
    assert crop_to_length_in_sec is None or crop_to_length_in_sample_points is None, \
    "Only one of crop_to_length_in_sec and crop_to_length_in_sample_points can be specified"

    # convert crop length to sample points
    crop_duration_in_sample = None
    if crop_to_length_in_sec:
        crop_duration_in_sample = int(sample_rate * crop_to_length_in_sec)
    elif crop_to_length_in_sample_points:
        crop_duration_in_sample = crop_to_length_in_sample_points

    # crop
    start = 0
    if crop_duration_in_sample:
        if waveform.shape[-1] > crop_duration_in_sample:
            if crop_randomly:
                start = random.randint(0, waveform.shape[-1] - crop_duration_in_sample)
            waveform = waveform[..., start:start + crop_duration_in_sample]

        elif waveform.shape[-1] < crop_duration_in_sample:
            if pad:
                waveform = torch.nn.functional.pad(waveform, (0, crop_duration_in_sample - waveform.shape[-1]))

    return waveform, start




In [7]:
class MusicDataReader2(pz.DataReader):
    def __init__(self, songs_dir,labels_dir=None,device=None,processor=None,split: str = "test"):
        super().__init__(audio2text_cols)
        self.split=split
        self.songs_dir =songs_dir
        self.songs = sorted(os.listdir(self.songs_dir))
        if self.split=='test':
            self.songs=self.songs[-10:]
        else:
            self.songs=self.songs[:15]
            self.labels=pd.read_csv(labels_dir,index_col=1)
        
        if device is None:
            self.device='cpu'
        
        if processor is None:
            processor=Wav2Vec2FeatureExtractor.from_pretrained("m-a-p/MERT-v1-330M",trust_remote_code=True)
        self.processor=processor
       
        #process all audio in here first
        self.processed_songs={}
        self.crops=[10,30,60]
        for idx,song_filename in enumerate(self.songs):
            
            audio_path=os.path.join(self.songs_dir,song_filename)
            song_id=song_filename[:-8] #ex. song_filename=7400.low.mp3, song_id=7400
            song_dict={'song_id':song_id}
            for crop in self.crops:
                waveform = load_audio(audio_path, target_sr=24000,
                            is_mono=True,
                            is_normalize=False,
                            crop_to_length_in_sample_points=int(crop*16000)+1,
                            crop_randomly=True,
                            pad=False).to(self.device)
                
                
                processed_audio = self.processor(waveform,
                            sampling_rate=24000,
                            return_tensors="pt")['input_values'][0].to(self.device)
                
                song_dict[f'audio_{crop}']=processed_audio
            self.processed_songs[idx]=song_dict
            
            
           
           
    def compute_label(self, song_id) -> dict:
        """Compute the genre label for a song given its entry in the dataset."""
       
        genres=eval(self.labels.at[int(song_id),'genre'])
        instruments=eval(self.labels.at[int(song_id),'instrument'])
        moods=eval(self.labels.at[int(song_id),'mood/theme'])        
        label_dict = {
            'genre': genres,
            'instruments': instruments,
            'moods': moods
        }
        return label_dict
    
    @staticmethod
    def precision(preds: list | None, target: list, labels_dir=r'/home/opalinav/musi_long120_parsed_results.csv'):
        'preds is list of song ids returned in query, target is query song id'
        
        labels=pd.read_csv(labels_dir, index_col=1)
        #compute labels
        genres=eval(labels.at[int(target),'genre'])
        instruments=eval(labels.at[int(target),'instrument'])
        moods=eval(labels.at[int(target),'mood/theme'])        
        label_dict = {
            'genre': genres,
            'instrument': instruments,
            'mood/theme': moods
        }
        
        scores=[]
        for category in label_dict:
            targ=set(label_dict[category])
            correct=0
            for pred_id in preds:
                pred_labels=set(eval(labels.at[int(pred_id),category]))
                #if query song has at least one shared  tag as target song, then count as correct
                if len(targ&pred_labels)>0:
                    correct+=1
            scores.append(correct/len(preds))
        avg=sum(scores)/len(scores)
        print(f'avg_score:{avg}')
        return avg

    def __len__(self):
        return len(self.songs)

    def __getitem__(self, idx: int):
        entry=self.processed_songs[idx]

        item = {"fields": {}, "labels": {},'score_fn':{}}
        item["fields"]["song_id"] = entry['song_id']
        for crop in self.crops:
            item["fields"][f"audio_{crop}"] =entry[f'audio_{crop}']

        if self.split == "train":
            # add label info
            #item['labels']['song_description']=entry['song_id']
            #items['score_fn']['track_id']=MusicDataReader2.
            item["labels"]['track_id'] = entry['song_id']  #self.compute_label(entry['song_id'])
            # add scoring functions for list fields
            item['score_fn']['track_id']=MusicDataReader2.precision
            
        return item

       

In [8]:
songs_dir=r'/home/opalinav/intersect120'
labels_dir=r'/home/opalinav/musi_long120_parsed_results.csv'
datareader=MusicDataReader2(songs_dir,labels_dir)
datareader_val=MusicDataReader2(songs_dir,labels_dir,split='train')

In [9]:
def run_plan(optimized=False):
    songs_dir=r'/home/opalinav/intersect120'
    labels_dir=r'/home/opalinav/musi_long120_parsed_results.csv'
    datareader=MusicDataReader2(songs_dir,labels_dir)
    datareader_val=MusicDataReader2(songs_dir,labels_dir,split='train')
    song_desc_cols=[
        {"name":"song_description", 'type':str, 'desc': 'text description of the song'}
    ]

    def search_func(index: chromadb.Collection, query: list[list[float]], k: int) -> list[str]:
        results = index.query(query, n_results=k)
        #pdb.set_trace()
        #print(f'resultsssss: {results['ids']}')
        return {'track_id': results['ids'][0]}
    
    import chromadb.utils.embedding_functions as embedding_functions
    openai_ef = embedding_functions.OpenAIEmbeddingFunction(
                    api_key=os.environ["OPENAI_API_KEY"],
                    model_name="text-embedding-3-small"
                )
    
    client = chromadb.PersistentClient(path=r'/home/opalinav/chroma120_v3')
    index = client.get_collection(name="Songs120_v3",embedding_function=openai_ef)
    track_labels = [
        {"name": "track_id", "type": list[str], "desc": "track id of songs most similar to query"},
    ]
    
    plan=pz.Dataset(datareader)
    plan=plan.sem_add_columns(song_desc_cols)
    plan=plan.retrieve(index=index,search_attr='song_description',output_attrs=track_labels,search_func=search_func)

    if optimized:
        config = pz.QueryProcessorConfig(
            policy=pz.MaxQuality(),
            val_datasource=datareader_val,
            execution_strategy="parallel",
            max_workers=20,
            allow_audio_crop=True
        )
        output=plan.run(config=config, k=2, j=3, sample_budget=20)
        return output
    else:
        output=plan.run()
        return output
        

    

In [10]:
def precision_outside(preds: list | None, target: list, labels_dir=r'/home/opalinav/musi_long120_parsed_results.csv'):
        'preds is list of song ids returned in query, target is query song id'
        print(f'precision called!')
        print(f'target: {target}')
        print(f'preds:{preds}')
        labels=pd.read_csv(labels_dir, index_col=1)
        #compute labels
        genres=eval(labels.at[int(target),'genre'])
        instruments=eval(labels.at[int(target),'instrument'])
        moods=eval(labels.at[int(target),'mood/theme'])        
        label_dict = {
            'genre': genres,
            'instrument': instruments,
            'mood/theme': moods
        }
        
        scores=[]
        for category in label_dict:
            targ=set(label_dict[category])
            correct=0
            for pred_id in preds:
                pred_labels=set(eval(labels.at[int(pred_id),category]))
                #if query song has at least one shared  tag as target song, then count as correct
                if len(targ&pred_labels)>0:
                    correct+=1
            scores.append(correct/len(preds))
        avg=sum(scores)/len(scores)
        print(f'avg_score:{avg}')
        return avg

In [11]:
import json
def save_things(output,df_path,stats_path):
    df=output.to_df()
    df['precision'] = df.apply(
        lambda row: precision_outside(row['track_id'], row['song_id']),
        axis=1
    )
    df.to_csv(df_path)

    execution_stats_dict=output.execution_stats.to_json()
    
    with open(stats_path, "w") as f:
        json.dump(execution_stats_dict, f)
    

In [13]:
out8.to_df()

Unnamed: 0,audio_10,audio_30,audio_60,song_id,song_description,track_id
0,"[[tensor(-0.0220, dtype=torch.float32), tensor...","[[tensor(0.1609, dtype=torch.float32), tensor(...","[[tensor(0.0223, dtype=torch.float32), tensor(...",86200,The song is a blues song. It is played on a gu...,[1330500]
1,"[[tensor(-0.0459, dtype=torch.float32), tensor...","[[tensor(-0.0544, dtype=torch.float32), tensor...","[[tensor(0.0003, dtype=torch.float32), tensor(...",872000,"The song is a slow, melancholic, and emotional...",[1366700]
2,"[[tensor(-0.1358, dtype=torch.float32), tensor...","[[tensor(0.1044, dtype=torch.float32), tensor(...","[[tensor(-0.0877, dtype=torch.float32), tensor...",847200,"​The song is a jazz song, with a medium tempo ...",[1357400]
3,"[[tensor(0.0021, dtype=torch.float32), tensor(...","[[tensor(-0.0109, dtype=torch.float32), tensor...","[[tensor(0.0188, dtype=torch.float32), tensor(...",920000,"​The song is a slow, mellow, bluesy jazz song....",[1357400]
4,"[[tensor(-0.0483, dtype=torch.float32), tensor...","[[tensor(-0.0161, dtype=torch.float32), tensor...","[[tensor(0.0763, dtype=torch.float32), tensor(...",903600,"​The song is a classical piece, likely written...",[1105300]
5,"[[tensor(0.0059, dtype=torch.float32), tensor(...","[[tensor(0.0095, dtype=torch.float32), tensor(...","[[tensor(0.0340, dtype=torch.float32), tensor(...",893400,"The song is a slow, mellow, and emotional inst...",[661300]
6,"[[tensor(-0.0765, dtype=torch.float32), tensor...","[[tensor(-0.4930, dtype=torch.float32), tensor...","[[tensor(-0.1569, dtype=torch.float32), tensor...",913700,The song is a funky house song with a medium t...,[945200]
7,"[[tensor(0.0479, dtype=torch.float32), tensor(...","[[tensor(-0.1044, dtype=torch.float32), tensor...","[[tensor(-0.0305, dtype=torch.float32), tensor...",95400,The song is a jazz song with a swinging rhythm...,[1357400]
8,"[[tensor(0.0978, dtype=torch.float32), tensor(...","[[tensor(-0.2834, dtype=torch.float32), tensor...","[[tensor(-0.1872, dtype=torch.float32), tensor...",950100,The song is a funky house track with a fast te...,[945200]
9,"[[tensor(-0.0382, dtype=torch.float32), tensor...","[[tensor(-0.0946, dtype=torch.float32), tensor...","[[tensor(-0.0533, dtype=torch.float32), tensor...",945200,The song is a dance track with a fast tempo of...,[945200]


In [12]:
out8=run_plan(optimized=True)

Output()

FIELDS[("Schema[['audio_10', 'audio_30', 'audio_60', 'song_id']].23b79bde5a.audio_10", True), ("Schema[['audio_10', 'audio_30', 'audio_60', 'song_id']].23b79bde5a.audio_30", True), ("Schema[['audio_10', 'audio_30', 'audio_60', 'song_id']].23b79bde5a.audio_60", True), ("Schema[['audio_10', 'audio_30', 'audio_60', 'song_id']].23b79bde5a.song_id", False)]
FIELDS2 
[True, True, True, False]
available_models:[GPT_4o, GPT_4o_MINI, GPT_4o, GPT_4o_MINI, MUSILINGO_LONG, MUSILINGO_SHORT]
did not pass:Model.GPT_4o
did not pass:Model.GPT_4o_MINI
did not pass:Model.GPT_4o
did not pass:Model.GPT_4o_MINI
passed:Model.MUSILINGO_LONG
passed:Model.MUSILINGO_SHORT



You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message


LlamaForCausalLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Output()

Total opt. time: 960.94s
Total opt. cost: $0.0000
FIELDS[("Schema[['audio_10', 'audio_30', 'audio_60', 'song_id']].23b79bde5a.audio_10", True), ("Schema[['audio_10', 'audio_30', 'audio_60', 'song_id']].23b79bde5a.audio_30", True), ("Schema[['audio_10', 'audio_30', 'audio_60', 'song_id']].23b79bde5a.audio_60", True), ("Schema[['audio_10', 'audio_30', 'audio_60', 'song_id']].23b79bde5a.song_id", False)]
FIELDS2 
[True, True, True, False]
available_models:[GPT_4o, GPT_4o_MINI, GPT_4o, GPT_4o_MINI, MUSILINGO_LONG, MUSILINGO_SHORT]
did not pass:Model.GPT_4o
did not pass:Model.GPT_4o_MINI
did not pass:Model.GPT_4o
did not pass:Model.GPT_4o_MINI
passed:Model.MUSILINGO_LONG
passed:Model.MUSILINGO_SHORT



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Total time: 359.94s
Total cost: $0.0000


In [13]:
out7.to_df()

Unnamed: 0,audio_10,audio_30,audio_60,song_id,song_description,track_id
0,"[[tensor(0.1118, dtype=torch.float32), tensor(...","[[tensor(0.2059, dtype=torch.float32), tensor(...","[[tensor(-0.2693, dtype=torch.float32), tensor...",847200,"The song is a jazz song, with a medium tempo o...","[1357400, 1087100, 1041500, 718300, 903600]"
1,"[[tensor(0.0016, dtype=torch.float32), tensor(...","[[tensor(-0.0279, dtype=torch.float32), tensor...","[[tensor(0.0489, dtype=torch.float32), tensor(...",893400,The song is a traditional Irish folk song. It ...,"[1170400, 86200, 1385300, 1105300, 1037900]"
2,"[[tensor(-0.0182, dtype=torch.float32), tensor...","[[tensor(0.0146, dtype=torch.float32), tensor(...","[[tensor(-0.0282, dtype=torch.float32), tensor...",903600,The song is a classical piano piece. The instr...,"[1009600, 633200, 757500, 1339700, 1266500]"
3,"[[tensor(0.1009, dtype=torch.float32), tensor(...","[[tensor(-0.0965, dtype=torch.float32), tensor...","[[tensor(0.0019, dtype=torch.float32), tensor(...",872000,The song is a pop song with a fast tempo of 12...,"[950100, 1155000, 1383900, 945200, 1420700]"
4,"[[tensor(0.1241, dtype=torch.float32), tensor(...","[[tensor(-0.2311, dtype=torch.float32), tensor...","[[tensor(-0.0009, dtype=torch.float32), tensor...",86200,The song is a blues song. It is played on a gu...,"[86200, 1330500, 816200, 1086300, 1395500]"
5,"[[tensor(0.0708, dtype=torch.float32), tensor(...","[[tensor(-0.0653, dtype=torch.float32), tensor...","[[tensor(0.0345, dtype=torch.float32), tensor(...",945200,The song is a dance track with a fast tempo of...,"[945200, 950100, 1420700, 1121400, 1383900]"
6,"[[tensor(-0.1115, dtype=torch.float32), tensor...","[[tensor(0.0581, dtype=torch.float32), tensor(...","[[tensor(0.0236, dtype=torch.float32), tensor(...",913700,The song is a funky house song with a medium t...,"[945200, 950100, 1121400, 1420500, 1420700]"
7,"[[tensor(0.0064, dtype=torch.float32), tensor(...","[[tensor(0.1023, dtype=torch.float32), tensor(...","[[tensor(-0.0591, dtype=torch.float32), tensor...",950100,The song is a funky house track with a fast te...,"[945200, 1420500, 1121400, 501200, 1420700]"
8,"[[tensor(-0.0569, dtype=torch.float32), tensor...","[[tensor(0.0292, dtype=torch.float32), tensor(...","[[tensor(-0.0126, dtype=torch.float32), tensor...",95400,The song is a jazz song with a swinging rhythm...,"[1357400, 1087100, 1041500, 718300, 903600]"
9,"[[tensor(-0.0079, dtype=torch.float32), tensor...","[[tensor(0.0018, dtype=torch.float32), tensor(...","[[tensor(0.0024, dtype=torch.float32), tensor(...",920000,The song is a jazz song. It is played on a pia...,"[1357400, 847200, 1041500, 903600, 1087100]"


In [14]:
save_things(out7,'crop_opt3.csv', 'crop_opt3.json')

precision called!
target: 847200
preds:['1357400', '1087100', '1041500', '718300', '903600']
avg_score:0.46666666666666673
precision called!
target: 893400
preds:['1170400', '86200', '1385300', '1105300', '1037900']
avg_score:0.3333333333333333
precision called!
target: 903600
preds:['1009600', '633200', '757500', '1339700', '1266500']
avg_score:0.39999999999999997
precision called!
target: 872000
preds:['950100', '1155000', '1383900', '945200', '1420700']
avg_score:0.39999999999999997
precision called!
target: 86200
preds:['86200', '1330500', '816200', '1086300', '1395500']
avg_score:0.3333333333333333
precision called!
target: 945200
preds:['945200', '950100', '1420700', '1121400', '1383900']
avg_score:0.7333333333333334
precision called!
target: 913700
preds:['945200', '950100', '1121400', '1420500', '1420700']
avg_score:0.6666666666666666
precision called!
target: 950100
preds:['945200', '1420500', '1121400', '501200', '1420700']
avg_score:0.6666666666666666
precision called!
targe

### past runs

In [None]:
songs_dir=r'/home/opalinav/intersect120'
labels_dir=r'/home/opalinav/musi_long120_parsed_results.csv'
datareader=MusicDataReader(songs_dir,labels_dir)
datareader_val=MusicDataReader(songs_dir,labels_dir,split='train')

In [None]:
song_desc_cols=[
    {"name":"song_description", 'type':str, 'desc': 'text description of the song'}
]

In [None]:
def search_func(index: chromadb.Collection, query: list[list[float]], k: int) -> list[str]:
    results = index.query(query, n_results=k)
    #pdb.set_trace()
    print(f'resultsssss: {results['ids']}')
    return {'track_id': results['ids'][0]}

In [None]:
import chromadb.utils.embedding_functions as embedding_functions
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
                api_key=os.environ["OPENAI_API_KEY"],
                model_name="text-embedding-3-small"
            )


In [None]:
#client = chromadb.PersistentClient(path=r'C:\Users\Opalina\Desktop\Meng\jamendo\chroma120_v1')
client = chromadb.PersistentClient(path=r'/home/opalinav/chroma120_v3')
index = client.get_collection(name="Songs120_v3",embedding_function=openai_ef)

In [None]:
index._embedding_function

<chromadb.utils.embedding_functions.openai_embedding_function.OpenAIEmbeddingFunction at 0x79aa740cfb30>

In [None]:
track_labels = [
    {"name": "track_id", "type": list[str], "desc": "track id of songs most similar to query"},
]

In [None]:
plan=pz.Dataset(datareader)
plan=plan.sem_add_columns(song_desc_cols)
plan=plan.retrieve(index=index,search_attr='song_description',output_attrs=track_labels,search_func=search_func)#, output_attr_desc='')

In [None]:
config = pz.QueryProcessorConfig(
        policy=pz.MaxQuality(),
        val_datasource=datareader_val,
        execution_strategy="parallel",
        max_workers=20,
)


In [None]:
#output=plan.run()

In [None]:
#df=output.to_df()
#df

In [None]:
df=output.to_df()
df

Unnamed: 0,audio_content,song_id,song_description,track_id
0,"[[tensor(0.0119, dtype=torch.float32), tensor(...",1028900,"​​The song is a slow, mellow, and emotional ba...",[1116400]
1,"[[tensor(0.0778, dtype=torch.float32), tensor(...",1009600,The song is a traditional Indian classical son...,[920000]
2,"[[tensor(-0.0571, dtype=torch.float32), tensor...",1014400,The song is a fast-paced rock song with a heav...,[1303000]
3,"[[tensor(0.3735, dtype=torch.float32), tensor(...",1012000,"The song is a funky, groovy, upbeat, and energ...",[1087100]
4,"[[tensor(-0.1565, dtype=torch.float32), tensor...",1037900,The song is a traditional Indian classical rag...,[920000]


In [None]:
def precision2(preds: list | None, target_id: list, labels_dir=r'/home/opalinav/musi_long120_parsed_results.csv'):
    'preds is list of song ids returned in query, targets is list of true-value genre tags'
    labels=pd.read_csv(labels_dir, index_col=1)
    correct=0
    targets=set(eval(labels.at[int(target_id),'genre']))
    print(f'targets: {targets}')
    for pred_id in preds:
        pred_genres=set(eval(labels.at[int(pred_id),'genre']))
        print(f'preds: {pred_genres}')
        #if query song has at least onde shared genre tag as target song, then count as correct
        if len(targets&pred_genres)>0:
            correct+=1
    
    return correct/len(preds)

In [None]:
df['precision'] = df.apply(
    lambda row: precision2(row['track_id'], row['song_id']),
    axis=1
)


targets: {'classical'}
preds: {'classical'}
targets: {'soundtrack', 'classical'}
preds: {'newage', 'soundtrack'}
targets: {'metal', 'rock', 'alternative'}
preds: {'rock', 'punkrock'}
targets: {'ambient', 'soundtrack', 'classical'}
preds: {'instrumentalpop', 'easylistening'}
targets: {'newage', 'ambient', 'lounge'}
preds: {'newage', 'soundtrack'}


In [None]:
df

Unnamed: 0,audio_content,song_id,song_description,track_id,precision
0,"[[tensor(0.0119, dtype=torch.float32), tensor(...",1028900,"​​The song is a slow, mellow, and emotional ba...",[1116400],1.0
1,"[[tensor(0.0778, dtype=torch.float32), tensor(...",1009600,The song is a traditional Indian classical son...,[920000],1.0
2,"[[tensor(-0.0571, dtype=torch.float32), tensor...",1014400,The song is a fast-paced rock song with a heav...,[1303000],1.0
3,"[[tensor(0.3735, dtype=torch.float32), tensor(...",1012000,"The song is a funky, groovy, upbeat, and energ...",[1087100],0.0
4,"[[tensor(-0.1565, dtype=torch.float32), tensor...",1037900,The song is a traditional Indian classical rag...,[920000],1.0


In [None]:
df['precision']

0    1.0
1    1.0
2    1.0
3    0.0
4    1.0
Name: precision, dtype: float64

In [None]:
df['precision']=df.apply()

NameError: name 'precision' is not defined

In [None]:
output.execution_stats.to_json()

{'execution_id': '43f75a37bd',
 'sentinel_plan_stats': {'f20198bc0d': {'plan_id': 'f20198bc0d',
   'plan_str': " 0. MarshalAndScanDataOp -> Schema[['audio_content', 'song_id']] \n\n 1.1. Schema[['audio_content', 'song_id']] -> LLMConvertBonded -> Schema[['audio_content', 'song_description', 'song_id']]\n    (audio_content, song_id) -> (audio_content, song_descriptio)\n    Model: Model.MUSILINGO_SHORT\n    Prompt Strategy: PromptStrategy.COT_QA\n\n 1.2. Schema[['audio_content', 'song_id']] -> LLMConvertBonded -> Schema[['audio_content', 'song_description', 'song_id']]\n    (audio_content, song_id) -> (audio_content, song_descriptio)\n    Model: Model.MUSILINGO_LONG\n    Prompt Strategy: PromptStrategy.COT_QA\n\n 2.1. Schema[['audio_content', 'song_description', 'song_id']] -> RetrieveOp -> Schema[['audio_content', 'song_description', 'song_id', 'track_id']]\n    (audio_content, song_descriptio) -> (audio_content, song_descriptio)\n    Retrieve: Collection with top 10\n\n 2.2. Schema[['a

In [None]:
df=output.to_df()
df

Unnamed: 0,audio_content,song_id,song_description,track_id
0,"[[tensor(0.0141, dtype=torch.float32), tensor(...",1009600,"The song is a slow, mellow, and romantic instr...",[]
1,"[[tensor(-0.0986, dtype=torch.float32), tensor...",1012000,The song is a funky instrumental with a groovy...,[]
2,"[[tensor(-0.0020, dtype=torch.float32), tensor...",1037900,The song is a traditional Irish folk song. It ...,[]
3,"[[tensor(-0.0616, dtype=torch.float32), tensor...",1014400,​​The song is a slow and mellow instrumental p...,[]
4,"[[tensor(-0.0192, dtype=torch.float32), tensor...",1028900,"​​The song is a slow, romantic ballad with a b...",[]


In [None]:
df=output.to_df()
df

Unnamed: 0,audio_content,song_id,song_description,track_id
0,"[[tensor(0.0967), tensor(0.1461), tensor(0.138...",1012000,The song is a melodic composition that can be ...,[1398100]
1,"[[tensor(0.0620), tensor(0.0968), tensor(0.095...",1028900,The song is a melodic piece that can be classi...,[1082100]
2,"[[tensor(0.0039), tensor(-0.0004), tensor(-0.0...",1037900,The song in question is a melodic piece that c...,[1082100]
3,"[[tensor(0.0039), tensor(0.0049), tensor(-0.04...",1014400,"The song in question is a melodic rock piece, ...",[1155900]
4,"[[tensor(-0.1304), tensor(-0.1827), tensor(-0....",1009600,The song is a melodic piece that is characteri...,[1082100]


In [None]:
output=plan.run()

FIELDS[("Schema[['audio_content', 'song_id']].959fe542c9.audio_content", True), ("Schema[['audio_content', 'song_id']].959fe542c9.song_id", False)]
FIELDS2 
[True, False]
convert.py: m-a-p/MusiLingo-long-v1
Loading Audio Encoder


You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message


Loading Audio Encoder Done
Loading LLAMA


LlamaForCausalLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading LLAMA Done


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

convert.py: m-a-p/MusiLingo-short-v1
Loading Audio Encoder
Loading Audio Encoder Done
Loading LLAMA


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading LLAMA Done


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

convert.py: m-a-p/MusiLingo-long-v1
Loading Audio Encoder
Loading Audio Encoder Done
Loading LLAMA


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading LLAMA Done


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

convert.py: m-a-p/MusiLingo-long-v1
Loading Audio Encoder
Loading Audio Encoder Done
Loading LLAMA


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading LLAMA Done


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Output()




Total time: 228.13s
Total cost: $0.0000


In [None]:
output.data_records

[DataRecord(audio_content=tensor([[-0.0423, -0.0612, -0.0575,  ..., -0.1701, -0.2124, -0.0549]]), song_description='The song is a melodic piece that is characterized by a slow tempo of 60 beats per minute. It is a song that is primarily instrumental, with the main focus being on the piano. The melody is a simple one, with a repetitive pattern that is played on the piano. The song is a ballad, which is a genre that is known for its emotional and sentimental nature. The mood of the song is one of sadness, with the melody evoking feelings of melancholy and longing. The theme of the song is one of love, with the melody expressing the pain of a broken heart. The song is best suited for a romantic setting, such as a candlelit dinner or a quiet night out. It is a song that can evoke a range of emotions, from nostalgia to heartbreak, making it a perfect choice for a romantic setting.', song_id='1009600', track_id=['1037900']),
 DataRecord(audio_content=tensor([[ 0.2288,  0.3258,  0.2984,  ...,

In [None]:
df=output.to_df()

In [None]:
df

Unnamed: 0,audio_content,song_id,song_description,track_id
0,"[[tensor(4.6705e-05), tensor(-0.0018), tensor(...",1009600,The song is a melodic piece that is characteri...,[1037900]
1,"[[tensor(-0.1634), tensor(-0.2427), tensor(-0....",1012000,The song is a melodic piece that can be classi...,[1366700]
2,"[[tensor(0.0118), tensor(0.0190), tensor(0.028...",1014400,The song you're referring to is a melodic piec...,[702500]


In [None]:
output=plan.run()
#output.to_df().to_csv('results.csv')

FIELDS[("Schema[['audio_content', 'song_id']].959fe542c9.audio_content", True), ("Schema[['audio_content', 'song_id']].959fe542c9.song_id", False)]
FIELDS2 
[True, False]
convert.py: m-a-p/MusiLingo-long-v1




Loading Audio Encoder


You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message


Loading Audio Encoder Done
Loading LLAMA


LlamaForCausalLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading LLAMA Done


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

convert.py: m-a-p/MusiLingo-short-v1
Loading Audio Encoder
Loading Audio Encoder Done
Loading LLAMA


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading LLAMA Done


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

convert.py: m-a-p/MusiLingo-long-v1
Loading Audio Encoder
Loading Audio Encoder Done
Loading LLAMA


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading LLAMA Done


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

convert.py: m-a-p/MusiLingo-long-v1
Loading Audio Encoder
Loading Audio Encoder Done
Loading LLAMA


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading LLAMA Done


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Output()




In [None]:
from IPython.display import Audio

In [None]:
Audio(data=song_datareader[0]['audio_content'],rate=24000)

## Persistent Chroma Collection

In [None]:
import chromadb

In [None]:
#client = chromadb.PersistentClient(path=r'C:\Users\Opalina\Desktop\Meng\jamendo\chroma120_v1')
client = chromadb.PersistentClient(path=r'/home/opalinav/chroma120_v1')
collection = client.get_collection(name="Songs120_v2")

In [None]:
collection.get(ids)

{'ids': ['1088000',
  '759300',
  '1066500',
  '1416200',
  '1189900',
  '1317900',
  '1300500',
  '1125400',
  '604700',
  '702500',
  '1073700',
  '1372200',
  '1337900',
  '945200',
  '501300',
  '1115700',
  '1388600',
  '1158200',
  '872000',
  '1320000',
  '1080900',
  '816200',
  '1037900',
  '1327700',
  '1353300',
  '1357400',
  '1041500',
  '1374300',
  '1096800',
  '1062500',
  '682600',
  '80500',
  '1121400',
  '1087100',
  '1014400',
  '1052800',
  '399500',
  '1090700',
  '1056400',
  '1371800',
  '95400',
  '1081000',
  '1132200',
  '1170600',
  '752300',
  '1366700',
  '206000',
  '86200',
  '1062600',
  '1284300',
  '1211600',
  '729300',
  '1420500',
  '1066200',
  '1398500',
  '7400',
  '1227500',
  '1116400',
  '1082100',
  '1163000',
  '1393400',
  '757500',
  '1282300',
  '1277700',
  '1166100',
  '1396500',
  '1012000',
  '913700',
  '1028900',
  '1084200',
  '1395500',
  '1173500',
  '661300',
  '1339700',
  '1420600',
  '1155900',
  '1416000',
  '1266500',
  '

In [None]:
import pandas as pd

In [None]:
song_csv.at[1088000,'genre']

"['alternative', 'soundtrack', 'pop']"

In [None]:
#song_csv=pd.read_csv(r'C:\Users\Opalina\Desktop\Meng\jamendo\musi_long120_parsed_results.csv',index_col=1)
song_csv=pd.read_csv(r'/home/opalinav/musi_long120_parsed_results.csv',index_col=1)
song_csv['description']=song_csv['Predicted_genre']+ ' '+song_csv['Predicted_instrument'] +  ' ' + song_csv['Predicted_mood/theme'] +' '+ song_csv['Predicted_bpm']
song_csv

Unnamed: 0_level_0,Unnamed: 0,instrument,gpt_instrument,Predicted_instrument,genre,gpt_genre,Predicted_genre,mood/theme,gpt_mood/theme,Predicted_mood/theme,bpm,gpt_bpm,Predicted_bpm,key,gpt_key,Predicted_key,description
Track_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1088000,0,['synthesizer'],"['synthesizer', 'bass', 'drummachine', 'keyboa...",The song is a melodic composition that feature...,"['alternative', 'soundtrack', 'pop']","['electronic', 'techno']","This song is a melodic techno piece, which is ...",['sport'],"['melancholic', 'sad', 'slow', 'emotional', 'd...","The song is a melancholic one, evoking a sense...",119.910202,120,"The song is a high-tempo, energetic piece of m...",D minor,B minor,This song is in the key of B minor. The song i...,"This song is a melodic techno piece, which is ..."
759300,1,"['strings', 'bass', 'drums']","['synthesizer', 'bass', 'drummachine', 'voice']",The song is a melodic composition that feature...,['soundtrack'],['hiphop'],"This song is a hip-hop track, characterized by...",['game'],"['melodic', 'mellow', 'slow', 'relaxing', 'nos...",The song is a melodic piece that evokes a sens...,99.987396,100,"The song in question is a mid-tempo piece, wit...",A minor,B flat minor,The song is in the key of B flat. This is a mi...,"This song is a hip-hop track, characterized by..."
1066500,2,"['accordion', 'ukulele', 'pipeorgan', 'drums']","['electricguitar', 'bass', 'synthesizer', 'dru...",The song is a melodic composition that feature...,['pop'],"['pop', 'synthpop']",This song is a melodic piece that belongs to t...,"['positive', 'uplifting', 'advertising', 'happy']","['melancholic', 'sad', 'slow', 'emotional']","The song in question is a melancholic one, wit...",95.076775,100,The song in question has a tempo of 100 beats ...,G major,C minor,The song is in the key of C minor. This is a m...,This song is a melodic piece that belongs to t...
1416200,3,"['piano', 'synthesizer']",['piano'],The song is a melodic composition that feature...,"['ambient', 'electronica', 'soundtrack']",['classical'],"This song is categorized as a classical piece,...","['sad', 'romantic']","['melancholic', 'sad', 'emotional', 'slow', 'd...","The song is a melancholic one, evoking a sense...",101.623245,60,"The song in question is a slow-paced, melodic ...",D# major,C minor,This song is in the key of C minor. The reason...,"This song is categorized as a classical piece,..."
1189900,4,"['guitar', 'drums']","['bass', 'guitar', 'drums', 'synthesizer']",The song is a melodic composition that feature...,"['indie', 'rock', 'punkrock']","['rock', 'alternative']","This song is a blend of two genres, namely the...","['retro', 'cool']","['melancholic', 'sad', 'slow', 'emotional', 'r...","The song is a melancholic one, evoking a sense...",154.786545,60,"The song is a slow-paced one, with a tempo of ...",F major,C minor,It's a song in the key of C minor. The song is...,"This song is a blend of two genres, namely the..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1110500,115,"['viola', 'violin', 'drummachine', 'cello', 's...",['guitar'],The song is a melodic piece that features a gu...,"['ambient', 'electronic']","['electronic', 'techno', 'minimal']",This song is categorized as a genre of electro...,['uplifting'],"['melancholic', 'slow', 'emotional']",The song in question is a melodic piece that e...,98.976578,60,"The song in question is a slow-paced, mellow p...",C major,C minor,The song is in the key of C minor. This is a m...,This song is categorized as a genre of electro...
794600,116,"['trumpet', 'violin', 'piano', 'trombone', 'ce...","['piano', 'strings']",The song is a melodic composition that feature...,"['orchestral', 'soundtrack']","['electronic', 'techno', 'trance', 'house']",This song is a melodic piece that belongs to t...,['film'],"['melancholic', 'slow', 'emotional', 'drama', ...","This song is a melancholic piece, evoking a se...",105.142189,120,"120. The song is a slow-paced, melodic piece t...",A# major,C minor,This song is in the key of C minor. The melody...,This song is a melodic piece that belongs to t...
1101300,117,"['piano', 'classicalguitar']","['synthesizer', 'bass', 'drums', 'keyboard']",The song is a melodic composition that feature...,"['lounge', 'newage', 'ethno']","['pop', 'electronic', 'synthpop', '80s']",This song is categorized as a pop song. Pop mu...,['meditative'],"['melodic', 'slow', 'mellow', 'soft', 'romanti...",The song is a melodic piece that evokes a sens...,99.990570,100,"The song is a medium-tempo piece, with a tempo...",D minor,C minor,The song is in the key of C minor. This is a m...,This song is categorized as a pop song. Pop mu...
798200,118,"['doublebass', 'horn', 'flute', 'violin', 'pia...","['piano', 'cello']",The song is a melodic composition that feature...,"['classical', 'medieval', 'soundtrack', 'orche...",['pop'],This song is a melodic piece that can be class...,"['epic', 'trailer', 'film']","['melancholic', 'sad', 'slow', 'emotional', 'd...","This song is a melancholic piece, evoking a se...",120.123466,100,"The song in question is a slow-paced, melodic ...",C major,C minor,The song is in the key of C minor. This is a m...,This song is a melodic piece that can be class...


In [None]:
metadata=song_csv[['instrument','genre','mood/theme','bpm','key']]
metadata

Unnamed: 0_level_0,instrument,genre,mood/theme,bpm,key
Track_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1088000,['synthesizer'],"['alternative', 'soundtrack', 'pop']",['sport'],119.910202,D minor
759300,"['strings', 'bass', 'drums']",['soundtrack'],['game'],99.987396,A minor
1066500,"['accordion', 'ukulele', 'pipeorgan', 'drums']",['pop'],"['positive', 'uplifting', 'advertising', 'happy']",95.076775,G major
1416200,"['piano', 'synthesizer']","['ambient', 'electronica', 'soundtrack']","['sad', 'romantic']",101.623245,D# major
1189900,"['guitar', 'drums']","['indie', 'rock', 'punkrock']","['retro', 'cool']",154.786545,F major
...,...,...,...,...,...
1110500,"['viola', 'violin', 'drummachine', 'cello', 's...","['ambient', 'electronic']",['uplifting'],98.976578,C major
794600,"['trumpet', 'violin', 'piano', 'trombone', 'ce...","['orchestral', 'soundtrack']",['film'],105.142189,A# major
1101300,"['piano', 'classicalguitar']","['lounge', 'newage', 'ethno']",['meditative'],99.990570,D minor
798200,"['doublebass', 'horn', 'flute', 'violin', 'pia...","['classical', 'medieval', 'soundtrack', 'orche...","['epic', 'trailer', 'film']",120.123466,C major


In [None]:
metadata=metadata.to_dict('index')
metadata

{1088000: {'instrument': "['synthesizer']",
  'genre': "['alternative', 'soundtrack', 'pop']",
  'mood/theme': "['sport']",
  'bpm': 119.910202026,
  'key': 'D minor'},
 759300: {'instrument': "['strings', 'bass', 'drums']",
  'genre': "['soundtrack']",
  'mood/theme': "['game']",
  'bpm': 99.9873962402,
  'key': 'A minor'},
 1066500: {'instrument': "['accordion', 'ukulele', 'pipeorgan', 'drums']",
  'genre': "['pop']",
  'mood/theme': "['positive', 'uplifting', 'advertising', 'happy']",
  'bpm': 95.0767745972,
  'key': 'G major'},
 1416200: {'instrument': "['piano', 'synthesizer']",
  'genre': "['ambient', 'electronica', 'soundtrack']",
  'mood/theme': "['sad', 'romantic']",
  'bpm': 101.623245239,
  'key': 'D# major'},
 1189900: {'instrument': "['guitar', 'drums']",
  'genre': "['indie', 'rock', 'punkrock']",
  'mood/theme': "['retro', 'cool']",
  'bpm': 154.7865448,
  'key': 'F major'},
 1317900: {'instrument': "['guitar']",
  'genre': "['rock', 'alternative', 'punkrock']",
  'mood/

In [None]:
ids=list([str(idx) for idx in song_csv.index])
descriptions=list(song_csv['description'])
metadatas=[metadata[int(idx)] for idx in ids]

In [None]:
client = chromadb.PersistentClient(path=r'/home/opalinav/chroma120_v3')
collection = client.get_or_create_collection(name="Songs120_v3",embedding_function=openai_ef)

In [None]:
collection.add(ids=ids,documents=descriptions,metadatas=metadatas)
    