In [15]:
import json
import requests
import numpy as np
import tempfile
import os

In [16]:
tracks = json.load(open('/workspace/fairouz/fairouz_conf/fairouz/tracks_contextualized.json'))

In [18]:
tracks[list(tracks.keys())[0]]

{'track_title': 'True Colors',
 'artist_name': 'The Weeknd',
 'album_name': 'Starboy (Deluxe)',
 'deezer_id': '2189328287',
 'discogs_id': '',
 'lyrics': {'lyrics': "[Intro]\r\nYeah, uh\r\nYeah\r\nYeah\r\n\r\n[Verse 1]\r\nTell me the truth, baby girl, who else been with you?\r\nIt's gon' come to my attention either way, yeah (Uh, yeah)\r\nAnd I understand, baby girl, we all have a past\r\nI'd much rather hear the truth come straight from you (Yeah)\r\n\r\n[Pre-Chorus]\r\nSo if I love you (If I love you)\r\nIt'd be just for you (It'd be just for you)\r\nSo when I'm touchin' you (Touchin' you)\r\nCan I trust in you? (Trust in you)\r\nCan I trust in you? Oh, babe (Trust in you)\r\n\r\n[Chorus]\r\nGirl, come show me your true colors\r\nPaint me a picture with your true colors\r\nThese are the questions of a new lover\r\nTrue colors, true colors\r\nGirl, come show me your true colors\r\nPaint me a picture with your true colors\r\nThese are confessions of a new lover (Hey, hey)\r\nTrue color

In [5]:
def embed_lyrics(lyric):
    if lyric["lyrics"] == "":
        return np.zeros(1024)
    
    url = "https://w7852kszrbkrz2-4000.proxy.runpod.net/embed/"
    lyrics = {
        "lyrics_summary": lyric["summary"],
        "lexical_keywords": lyric["context"],
        "sentiment_keywords": lyric["emotional"]
    }
    headers = {
        'Content-Type': 'application/json',
        'Authorization': 'Bearer 543c7086-c880-45de-8bce-6c9c906293bb'
        }
    response = requests.post(url, json=lyrics, headers=headers)
    return np.array(json.loads(response.json())["embedding"])

In [None]:
def embed_images(images_url):
    url = f"https://w7852kszrbkrz2-4002.proxy.runpod.net/embed/"
    
    with tempfile.TemporaryDirectory() as temp_dir:
        temp_file_path = os.path.join(temp_dir, file.filename)

    # Save the uploaded file to the temporary directory
    with open(temp_file_path, 'wb') as temp_file:
        content = await file.read()
        temp_file.write(content)

    return np.array(json.loads(response.json())["embedding"])

In [7]:
from tqdm import tqdm

In [11]:
embeddings = []

In [12]:
for id, track in tqdm(tracks.items(), total=len(tracks.keys())):
    try:
        embeddings.append((id, embed_lyrics(track["lyrics"])))
    except Exception as e:
        print(e)

assert len(embeddings) == len(tracks.keys())

100%|██████████| 822/822 [03:05<00:00,  4.44it/s]


In [13]:
embeddings_json = [{"id": id, "embedding": embedding.tolist()} for id, embedding in embeddings]

In [14]:
json.dump(embeddings_json, open("/workspace/fairouz/fairouz_conf/fairouz/lyrics_mxbai_embeddings.json", "w"))

In [110]:
metadatas = []

In [111]:
for emb in embeddings_json:
    md = tracks["all"]["tracks"][emb["id"]]
    md_new = {
        "track_name": md["track_name"],
        "artist_name": md["artist_name"],
        "album_name": md["album_name"],
        "genres": ", ".join(md["genres"]),
        "keywords": ", ".join(md["lyrics"]["context"]),
        "emotional": ", ".join(md["lyrics"]["emotional"]),
        "synopsis": md["lyrics"]["summary"]
    }
    metadatas.append(md_new)

In [96]:
np.array([emb[1] for emb in embeddings]).shape

(821, 1024)

In [107]:
len(metadatas)

821

In [112]:
metadatas

[{'track_name': 'Are You What You Want to Be?',
  'artist_name': 'Foster The People',
  'album_name': 'Supermodel',
  'genres': 'Alternative',
  'keywords': 'Champs-Élysées, Djembe of Ghana, Fine lady from Belize, Dissidents, Revolution',
  'emotional': 'fear, cowardice, reflection, struggle, self-doubt',
  'synopsis': "The lyrics express the speaker's internal struggle with living up to their desires and dealing with the consequences of their actions. Themes of fear, cowardice, and self-reflection are present."},
 {'track_name': 'Out In The Fields',
  'artist_name': 'Gary Moore',
  'album_name': 'All The Best',
  'genres': 'Rock',
  'keywords': 'equality, war, death, fighting, inequality',
  'emotional': 'inequality, war, death, fighting, hopelessness',
  'synopsis': 'The lyrics express the idea that in a war or conflict, all men are equal and no factor such as race or religion matters. The theme is one of equality and the inevitability of death.'},
 {'track_name': 'Incredible Sauce (

In [113]:
from nomic import atlas
import numpy as np
dataset = atlas.map_data(data = metadatas, 
                         embeddings=np.array([emb[1] for emb in embeddings]), 
                         identifier="grad_playlist_lyrics_metadata")

[32m2024-03-19 23:21:29.252[0m | [1mINFO    [0m | [36mnomic.dataset[0m:[36m_create_project[0m:[36m868[0m - [1mCreating dataset `grad-playlist-lyrics-metadata`[0m
[32m2024-03-19 23:21:29.799[0m | [1mINFO    [0m | [36mnomic.atlas[0m:[36mmap_data[0m:[36m108[0m - [1mUploading data to Atlas.[0m
1it [00:02,  2.05s/it]
[32m2024-03-19 23:21:31.877[0m | [1mINFO    [0m | [36mnomic.dataset[0m:[36m_add_data[0m:[36m1537[0m - [1mUpload succeeded.[0m
[32m2024-03-19 23:21:31.881[0m | [1mINFO    [0m | [36mnomic.atlas[0m:[36mmap_data[0m:[36m123[0m - [1m`tyqnology/grad-playlist-lyrics-metadata`: Data upload succeeded to dataset`[0m
[32m2024-03-19 23:21:34.180[0m | [1mINFO    [0m | [36mnomic.dataset[0m:[36mcreate_index[0m:[36m1246[0m - [1mCreated map `grad_playlist_lyrics_metadata` in dataset `tyqnology/grad-playlist-lyrics-metadata`: https://atlas.nomic.ai/data/tyqnology/grad-playlist-lyrics-metadata/map[0m
