Relevant imports and initialization code.

In [19]:
import pandas as pd
import numpy as np
from tensorflow import keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

seed = 25
np.random.seed(seed)
auth_manager = SpotifyClientCredentials(client_id='5e87783324eb47cba39f43f39e374c71',client_secret='68427108059946e9abfa226df5780371')
sp = spotipy.Spotify(auth_manager=auth_manager)

In [22]:
def encode_data(data, padding_length, should_lowercase = True):
    t = Tokenizer(lower = should_lowercase)
    t.fit_on_texts(data)
    vocab_size = len(t.word_index) + 1
    # integer encode the reviews.
    encoded_playlists = t.texts_to_sequences(data)
    # pad reviews to a max length of 150 words
    padded_data = pad_sequences(encoded_playlists, maxlen=padding_length, padding='post')
    return t, padded_data, vocab_size

def generator(data_points):
    for data in data_points:
        yield keras.utils.to_categorical(data - 1, num_classes=y_vocab_size)

df = pd.read_json('./datasets/challenge_set.json')
playlist_series = df['playlists']

X = []
Y = []

for item in playlist_series:
    if 'name' in item:
        playlist_name = item['name'].strip()

        if (len(item['tracks']) > 0):
            for track in item['tracks']:
                X.append(playlist_name)
                Y.append(track['track_uri'].split(':')[2])

x_word_length = 10
x_tokenizer, X, x_vocab_size = encode_data(X, x_word_length)
y_tokenizer, Y, y_vocab_size = encode_data(Y, 1, should_lowercase = False)

Y = Y.flatten()
print(y_vocab_size)
# Y = generator(Y)



63997


In [5]:
def build_and_compile_model():
    model = keras.Sequential([
      keras.layers.Embedding(x_vocab_size, 16, input_length=x_word_length),
      keras.layers.Bidirectional(keras.layers.LSTM(16, return_sequences=True)),
      keras.layers.Bidirectional(keras.layers.LSTM(16)),
      #keras.layers.Dense(16,activation='relu'),
      keras.layers.Dense(y_vocab_size, activation='softmax')
    ])
    optimizer = keras.optimizers.Adam(learning_rate=0.01)
    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [6]:

model = build_and_compile_model()
model.summary()

model.fit(X, Y, epochs=10, batch_size=5000, validation_split=0.2)


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 10, 16)            35680     
                                                                 
 bidirectional_2 (Bidirecti  (None, 10, 32)            4224      
 onal)                                                           
                                                                 
 bidirectional_3 (Bidirecti  (None, 32)                6272      
 onal)                                                           
                                                                 
 dense_1 (Dense)             (None, 63997)             2111901   
                                                                 
Total params: 2158077 (8.23 MB)
Trainable params: 2158077 (8.23 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch

<keras.src.callbacks.History at 0x1c404bd6520>

In [100]:
def flatten(l):
    return [item for sublist in l for item in sublist]

def print_track(track_id):
    #print(track)
    track = sp.track(track_id)
    print('Track: ' + track['name'] + ' [' + track['id'] + ']')
    artists = sp.artists([artist['id'] for artist in track['artists']])['artists']
    artist_names = [artist['name'] for artist in artists]
    artist_genres = np.unique(flatten([artist['genres'] for artist in artists]))
    print('Artists: ' + str.join(', ', artist_names))
    print('Genres: ' + str.join(', ', artist_genres))

# chill rap [6, 3, 0, 0, 0, 0, 0, 0, 0, 0]
# summer party [10, 9, 0, 0, 0, 0, 0, 0, 0, 0]
# classical [1476, 0, 0, 0, 0, 0, 0, 0, 0, 0]
# country rock [1, 2, 0, 0, 0, 0, 0, 0, 0, 0]

print(x_tokenizer.word_index)

input = 'winter chill songs'

input_words = input.split(' ')

tokenized_words = []
for word in input_words:
    if word in x_tokenizer.word_index:
        tokenized_words.append(x_tokenizer.word_index[word])
    else:
        print('Word: ' + word + ' not supported.')

tokenized_words = np.array(tokenized_words)
tokenized_words.resize(10)
print(tokenized_words)

prediction = model.predict(np.array([tokenized_words]))
prediction_indices = np.argsort(prediction[0])[::-1][:5]

for id in prediction_indices:
    track_id = y_tokenizer.index_word[id]
    print_track(track_id)
    print('')

{'country': 1, 'rock': 2, 'rap': 3, 'music': 4, 'good': 5, 'chill': 6, 'jams': 7, 'songs': 8, 'party': 9, 'summer': 10, 'my': 11, 'playlist': 12, 'throwback': 13, 'new': 14, 'workout': 15, 'throwbacks': 16, 'oldies': 17, 'the': 18, 'vibes': 19, 'old': 20, 'classic': 21, 'road': 22, 'car': 23, 'work': 24, 'hop': 25, 'hip': 26, 'trip': 27, 'alternative': 28, 'pop': 29, 'mix': 30, 'lit': 31, 'jamz': 32, '2017': 33, 'classics': 34, 'school': 35, 'tbt': 36, 'christmas': 37, '2016': 38, 'it': 39, '80s': 40, '90s': 41, 'best': 42, 'worship': 43, 'out': 44, 'all': 45, 'r': 46, 'love': 47, 'up': 48, 'stuff': 49, 'tunes': 50, 'everything': 51, 'random': 52, 'edm': 53, 'dance': 54, '2015': 55, 'b': 56, 'jam': 57, 'disney': 58, 'slow': 59, 'gym': 60, 'happy': 61, 'but': 62, 'back': 63, 'feels': 64, '17': 65, 'drive': 66, 'hits': 67, 'bangers': 68, 'goodies': 69, 'time': 70, 'get': 71, 'christian': 72, 'i': 73, 'indie': 74, 'wedding': 75, 'chillin': 76, 'hype': 77, 'alt': 78, '3': 79, 'roadtrip': 8