In [72]:
import json
import requests
import numpy as np
import tempfile
import os
from tqdm import tqdm

In [73]:
tracks = json.load(open('/workspace/fairouz/fairouz_conf/fairouz/tracks_contextualized.json'))

In [74]:
def embed_lyrics(lyric, emb_dim = 1024):
    if lyric["lyrics"] == "":
        return np.zeros(emb_dim)
    
    url = "https://w7852kszrbkrz2-4000.proxy.runpod.net/embed/"
    lyrics = {
        "lyrics_summary": lyric["summary"],
        "lexical_keywords": lyric["context"],
        "sentiment_keywords": lyric["emotional"]
    }
    headers = {
        'Content-Type': 'application/json',
        'Authorization': 'Bearer 543c7086-c880-45de-8bce-6c9c906293bb'
        }
    response = requests.post(url, json=lyrics, headers=headers)
    return np.array(json.loads(response.json())["embedding"])

In [75]:
embeddings = []

In [76]:
for id, track in tqdm(tracks.items(), total=len(tracks.keys())):
    try:
        embeddings.append((id, embed_lyrics(track["lyrics"])))
    except Exception as e:
        print(e)

assert len(embeddings) == len(tracks.keys())

100%|██████████| 822/822 [03:20<00:00,  4.09it/s]


In [77]:
embeddings_json = [{"id": id, "embedding": embedding.tolist()} for id, embedding in embeddings]

In [78]:
json.dump(embeddings_json, open("/workspace/fairouz/fairouz_conf/fairouz/embeddings/lyrics/song_lyrics_e5_embeddings.json", "w"))

In [79]:
metadatas = []

In [80]:
for emb in embeddings_json:
    md = tracks[emb["id"]]
    md_new = {
        "track_name": md["track_title"],
        "artist_name": md["artist_name"],
        "album_name": md["album_name"],
        "genres": ", ".join(md["genres"]),
        "keywords": ", ".join(md["lyrics"]["context"]),
        "emotional": ", ".join(md["lyrics"]["emotional"]),
        "synopsis": md["lyrics"]["summary"]
    }
    metadatas.append(md_new)

In [81]:
np.array([emb[1] for emb in embeddings]).shape

(822, 1024)

In [82]:
len(metadatas)

822

In [83]:
metadatas

[{'track_name': 'True Colors',
  'artist_name': 'The Weeknd',
  'album_name': 'Starboy (Deluxe)',
  'genres': 'R&B',
  'keywords': 'new lover, trust, true colors, past relationships, painting a picture',
  'emotional': 'desire for transparency, confusion, trust issues, longing, hopefulness',
  'synopsis': "The song is about a new relationship and the importance of trust and honesty in it. The singer asks the lover to reveal their true colors and past experiences, and expresses confusion about inconsistencies in their stories. The chorus repeats the phrase 'paint me a picture with your true colors' and 'these are confessions of a new lover', indicating the desire for transparency and understanding in the relationship."},
 {'track_name': 'Nothing Without You',
  'artist_name': 'The Weeknd',
  'album_name': 'Starboy (Deluxe)',
  'genres': 'R&B',
  'keywords': 'belong, darkness, love, pain, nothing',
  'emotional': 'love, dependence, emptiness, pain, fear',
  'synopsis': "The lyrics expres

In [85]:
from nomic import atlas
import numpy as np
dataset = atlas.map_data(data = metadatas, 
                         embeddings=np.array([emb[1] for emb in embeddings]), 
                         identifier="grad_playlist_lyrics_metadata_e5")

[32m2024-03-23 15:31:38.150[0m | [1mINFO    [0m | [36mnomic.dataset[0m:[36m_create_project[0m:[36m868[0m - [1mCreating dataset `grad-playlist-lyrics-metadata-e5`[0m
[32m2024-03-23 15:31:38.482[0m | [1mINFO    [0m | [36mnomic.atlas[0m:[36mmap_data[0m:[36m108[0m - [1mUploading data to Atlas.[0m
1it [00:01,  1.63s/it]
[32m2024-03-23 15:31:40.139[0m | [1mINFO    [0m | [36mnomic.dataset[0m:[36m_add_data[0m:[36m1537[0m - [1mUpload succeeded.[0m
[32m2024-03-23 15:31:40.145[0m | [1mINFO    [0m | [36mnomic.atlas[0m:[36mmap_data[0m:[36m123[0m - [1m`tyqnology/grad-playlist-lyrics-metadata-e5`: Data upload succeeded to dataset`[0m
[32m2024-03-23 15:31:41.867[0m | [1mINFO    [0m | [36mnomic.dataset[0m:[36mcreate_index[0m:[36m1246[0m - [1mCreated map `grad_playlist_lyrics_metadata_e5` in dataset `tyqnology/grad-playlist-lyrics-metadata-e5`: https://atlas.nomic.ai/data/tyqnology/grad-playlist-lyrics-metadata-e5/map[0m
