In [2]:
import torch
# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# Optionally allocate a fraction of GPU memory
torch.cuda.set_per_process_memory_fraction(0.9, 0)

# Check memory stats
print(f'Allocated GPU memory: {torch.cuda.memory_allocated(device) / (1024 ** 3):.2f} GB')
print(f'Reserved GPU memory: {torch.cuda.memory_reserved(device) / (1024 ** 3):.2f} GB')

Using device: cuda
Allocated GPU memory: 0.00 GB
Reserved GPU memory: 0.00 GB


In [4]:
from data.cleaning import download_and_clean

artists, tracks = download_and_clean()

In [5]:
print(artists.head())

                         id  ...  popularity
45   0VLMVnVbJyJ4oyZs2L3Yl2  ...           6
46   0dt23bs4w8zx154C5xdVyl  ...           5
47   0pGhoB99qpEJEsBQxgaskQ  ...           7
48   3HDrX2OtSuXLW5dLR85uN3  ...           6
136  22mLrN5fkppmuUPsHx6i2G  ...           3

[5 rows x 5 columns]


In [6]:
print(tracks.head())

                       id  ...                                             genres
2  07A5yehtSnoedViJAZkNnc  ...                             [tango, vintage tango]
3  08FmqUhxtyLTn6pAh6bk45  ...                             [tango, vintage tango]
4  08y9GfoqCWfOGsKdwojr5e  ...  [adult standards, big band, swing, easy listen...
5  0BRXJHRNGQ3W4v9frnSfhu  ...  [adult standards, big band, swing, easy listen...
7  0IA0Hju8CAgYfV1hwhidBH  ...                                  [vintage chanson]

[5 rows x 21 columns]


In [7]:
from itertools import chain
all_genres = set(chain.from_iterable(tracks["genres"]))
len(all_genres)
# all_genres

4706

In [9]:
genre_to_index = {genre: idx for idx, genre in enumerate(sorted(all_genres))}
# genre_to_index

print(all_genres)
print(genre_to_index)



{'finnish folk', 'lithuanian pop', 'singaporean singer-songwriter', 'swedish metalcore', 'danish jazz', 'lgbtq+ hip hop', 'alternative pop', 'full on', 'chill beats', 'german jazz', 'deep space rock', 'dreamo', 'sad rap', 'suomirap', 'shakuhachi', 'slovenian rock', 'bard', 'mollywood', 'nashville indie', 'french romanticism', 'filthstep', 'svensk indie', 'atmospheric post-rock', 'drone rock', 'outlaw country', 'zespol dzieciecy', 'pibroch', 'early romantic era', 'classic portuguese pop', 'classical trumpet', 'kurdish rock', 'vintage french electronic', 'spacewave', 'british comedy', 'kayokyoku', 'microhouse', 'australian comedy', 'epicore', 'vapor pop', 'indonesian death metal', 'slc indie', 'classic norwegian pop', 'jig and reel', 'german metal', 'polish black metal', 'trap baiano', 'dutch punk', 'shojo', 'south african country', 'trap carioca', 'string folk', 'dutch tech house', 'neo classical metal', 'rock keyboard', 'atlanta indie', 'turkish pop', 'electronica', 'indian edm', 'rumb

In [18]:
from sklearn import preprocessing
import numpy as np

labelEncoded = preprocessing.LabelEncoder()

arr = np.asarray(tracks["genres"])
unique = {}

i = 0

nested_lst_of_tuples = [tuple(l) for l in arr]

for elem in nested_lst_of_tuples:
    if elem not in unique:
        unique[elem] = i
        i = i + 1

coded = []
for elem in nested_lst_of_tuples:
    coded.append(unique[elem])


Accuracy:  0.03843148255755706
Recall:  0.2115322976804458
F1 Score:  0.008495640418688255


In [None]:
from sklearn.model_selection import train_test_split

inputs = tracks[["popularity", "duration_ms", "explicit", "danceability", "energy", "key", "loudness", "mode",
                "speechiness", "acousticness", "instrumentalness", "liveness", "valence", "tempo", "time_signature"]]
outputs = coded
X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=0.1, random_state=42)

In [None]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()

classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

In [None]:
from sklearn import metrics

print("Accuracy: ", metrics.accuracy_score(y_test, y_pred))
print("Recall: ", metrics.recall_score(y_test, y_pred, average='macro', zero_division=1))
print("F1 Score: ", metrics.f1_score(y_test, y_pred, average='macro'))