In [1]:
from usearch.index import Index
import numpy as np
import json
from tqdm import tqdm
import faiss

In [3]:
tracks = json.load(open('../data/tracks.json'))

In [4]:
tracks_ids = list(tracks.keys())

In [5]:
track_ids_mapping = {track_id: i for i, track_id in enumerate(tracks_ids)}

In [6]:
ids_to_tracks = {i: track_id for i, track_id in enumerate(tracks_ids)}

In [7]:
json.dump(ids_to_tracks, open('../data/id_to_track_mapping.json', 'w'))

In [8]:
graph_embeddings = json.load(open('../data/graph_embeddings.json'))
text_embeddings = json.load(open('../data/text_embeddings.json'))
image_embeddings = json.load(open('../data/image_embeddings.json'))
audio_embeddings = json.load(open('../data/audio_embeddings.json'))
fairouz_embeddings = json.load(open('../data/fairouz_embeddings.json'))

In [9]:
json.dump(track_ids_mapping, open(r'../data/track_ids_mapping.json', 'w'))

In [10]:
fairouz_index = faiss.IndexFlatL2(128)
fairouz_np = []
for embedding in fairouz_embeddings:
    key = track_ids_mapping[embedding['id']]
    embedding = np.array(embedding['embedding'], dtype=np.float32)
    fairouz_np.append(embedding)
fairouz_np = np.array(fairouz_np, dtype=np.float32)
np.save('../data/fairouz_np.npy', fairouz_np)
fairouz_index.add(fairouz_np)
faiss.write_index(fairouz_index, "../data/fairouz_index.faiss")

In [11]:
graph_index = faiss.IndexFlatL2(128)
graph_np = []
for embedding in graph_embeddings:
    key = track_ids_mapping[embedding['id']]
    embedding = np.array(embedding['embedding'], dtype=np.float32)
    graph_np.append(embedding)
    
graph_np = np.array(graph_np, dtype=np.float32)
np.save('../data/graph_np.npy', graph_np)
graph_index.add(graph_np)
faiss.write_index(graph_index, "../data/graph_index.faiss")

In [13]:
audio_index = faiss.IndexFlatL2(128)
audio_np = []
for embedding in audio_embeddings:
    key = track_ids_mapping[embedding['id']]
    embedding = np.array(embedding['embedding'], dtype=np.float32)
    audio_np.append(embedding)
audio_np = np.array(audio_np, dtype=np.float32)
np.save('../data/audio_np.npy', audio_np)
audio_index.add(audio_np)
faiss.write_index(audio_index, "../data/audio_index.faiss")

In [15]:
image_index = faiss.IndexFlatL2(512)
image_np = []
for embedding in image_embeddings:
    key = track_ids_mapping[embedding['id']]
    embedding = np.array(embedding['embedding'], dtype=np.float32)
    image_np.append(embedding)
image_np = np.array(image_np, dtype=np.float32)
np.save('../data/image_np.npy', image_np)
image_index.add(image_np)
faiss.write_index(image_index, "../data/image_index.faiss")

In [16]:
text_index = faiss.IndexFlatL2(1024)
text_np = []
for embedding in text_embeddings:
    key = track_ids_mapping[embedding['id']]
    embedding = np.array(embedding['embedding'], dtype=np.float32)
    text_np.append(embedding)
text_np = np.array(text_np, dtype=np.float32)
np.save(r'../data/text_np.npy', text_np)
text_index.add(text_np)
faiss.write_index(text_index, "../data/text_index.faiss")