In [1]:
from dotenv import load_dotenv
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import numpy as np
from numpy import loadtxt
import csv
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [2]:
load_dotenv()
scope = 'playlist-modify-public'
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))
playlists = sp.current_user_playlists()
user_id = sp.me()['id']

def accuracy(true, pred):
    correct = 0
    for i in range(len(true)):
        if true[i] == pred[i]:
            correct += 1
    return correct/len(true)

In [3]:
all_track_data = loadtxt('data.csv', delimiter=',')
all_track_labels = []
with open('labels.csv', newline='') as csvfile:
    label_reader = csv.reader(csvfile, delimiter=',')
    for row in label_reader:
        for label in row:
            all_track_labels.append(label[2:-2])
all_track_labels = np.array([all_track_labels]).T

#### Prepoccessing 

In [4]:
X_train, X_val, y_train, y_val = train_test_split(all_track_data, all_track_labels, test_size=0.2)
print(X_train.shape, X_val.shape, y_train.shape, y_val.shape)

(1931, 11) (483, 11) (1931, 1) (483, 1)


### Begin Classification Below on all_track_data and all_track_labels

#### Decision Tree Classifier

##### Create and Train Random Forrest Classifier

In [5]:
depth = 12
estimators = 100
model = RandomForestClassifier(max_depth=depth, n_estimators=estimators, criterion='entropy', max_features='sqrt')
model.fit(all_track_data, np.ravel(all_track_labels))



RandomForestClassifier(criterion='entropy', max_depth=12, max_features='sqrt')

##### Test on training and validation datasets

In [6]:
y_train_pred = model.predict(X_train)
y_val_pred = model.predict(X_val)
train_acc = accuracy(y_train, y_train_pred)
val_acc = accuracy(y_val, y_val_pred)

print("Training Accuracy:", train_acc)
print("Validation Accuracy:", val_acc)

Training Accuracy: 0.8508544795442776
Validation Accuracy: 0.8571428571428571


##### Hyper-param tuning

In [7]:
depths = [1, 3, 5, 10, 12, 15, 20]
estimators = [1, 10, 30, 50, 80, 100, 150]

best_params = (0, 0)
best_accuracy = 0
for depth in depths:
    for estimator in estimators:
        model = RandomForestClassifier(max_depth=depth, n_estimators=estimator, criterion='entropy', max_features='sqrt')
        model.fit(all_track_data, np.ravel(all_track_labels))
        y_pred = model.predict(X_val)
        val_acc = accuracy(y_val, y_pred)
        print("Depth:", depth, "| Estimators:", estimator, "| Accuracy", val_acc)
        if val_acc > best_accuracy:
            best_accuracy = val_acc
            best_params = (depth, estimator)

Depth: 1 | Estimators: 1 | Accuracy 0.07867494824016563
Depth: 1 | Estimators: 10 | Accuracy 0.11387163561076605
Depth: 1 | Estimators: 30 | Accuracy 0.15320910973084886
Depth: 1 | Estimators: 50 | Accuracy 0.16356107660455488
Depth: 1 | Estimators: 80 | Accuracy 0.16356107660455488
Depth: 1 | Estimators: 100 | Accuracy 0.16977225672877846
Depth: 1 | Estimators: 150 | Accuracy 0.16563146997929606
Depth: 3 | Estimators: 1 | Accuracy 0.13871635610766045
Depth: 3 | Estimators: 10 | Accuracy 0.23395445134575568
Depth: 3 | Estimators: 30 | Accuracy 0.23809523809523808
Depth: 3 | Estimators: 50 | Accuracy 0.22981366459627328
Depth: 3 | Estimators: 80 | Accuracy 0.2484472049689441
Depth: 3 | Estimators: 100 | Accuracy 0.2525879917184265
Depth: 3 | Estimators: 150 | Accuracy 0.2463768115942029
Depth: 5 | Estimators: 1 | Accuracy 0.2712215320910973
Depth: 5 | Estimators: 10 | Accuracy 0.30434782608695654
Depth: 5 | Estimators: 30 | Accuracy 0.33747412008281574
Depth: 5 | Estimators: 50 | Accura

In [8]:
print("Best Parameters and Accuracy:")
print("Depth:", best_params[0], "| Estimators:", best_params[1], "| Accuracy", best_accuracy)

Best Parameters and Accuracy:
Depth: 15 | Estimators: 80 | Accuracy 0.8674948240165632


#### Multiclass SVM


In [9]:
all_track_data = loadtxt('data.csv', delimiter=',')
all_track_labels = []
with open('labels.csv', newline='') as csvfile:
    label_reader = csv.reader(csvfile, delimiter=',')
    for row in label_reader:
        for label in row:
            all_track_labels.append(label[2:-2])
all_track_labels = np.array([all_track_labels]).T