In [2]:
from dotenv import load_dotenv
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import numpy as np
from numpy import loadtxt
import csv
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [3]:
load_dotenv()
scope = 'playlist-modify-public'
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))
playlists = sp.current_user_playlists()
user_id = sp.me()['id']

def accuracy(true, pred):
    correct = 0
    for i in range(len(true)):
        if true[i] == pred[i]:
            correct += 1
    return correct/len(true)

In [4]:
all_track_data = loadtxt('data.csv', delimiter=',')
all_track_labels = []
with open('labels.csv', newline='') as csvfile:
    label_reader = csv.reader(csvfile, delimiter=',')
    for row in label_reader:
        for label in row:
            all_track_labels.append(label[2:-2])
all_track_labels = np.array([all_track_labels]).T

#### Prepoccessing 

In [5]:
X_train, X_val, y_train, y_val = train_test_split(all_track_data, all_track_labels, test_size=0.2)
print(X_train.shape, X_val.shape, y_train.shape, y_val.shape)

(1924, 10) (481, 10) (1924, 1) (481, 1)


### Begin Classification Below on all_track_data and all_track_labels

#### Decision Tree Classifier

##### Create and Train Random Forrest Classifier

In [6]:
depth = 12
estimators = 100
model = RandomForestClassifier(max_depth=depth, n_estimators=estimators, criterion='entropy', max_features='sqrt')
model.fit(all_track_data, np.ravel(all_track_labels))



RandomForestClassifier(criterion='entropy', max_depth=12, max_features='sqrt')

##### Test on training and validation datasets

In [7]:
y_train_pred = model.predict(X_train)
y_val_pred = model.predict(X_val)
train_acc = accuracy(y_train, y_train_pred)
val_acc = accuracy(y_val, y_val_pred)

print("Training Accuracy:", train_acc)
print("Validation Accuracy:", val_acc)

Training Accuracy: 0.8435550935550935
Validation Accuracy: 0.8835758835758836


##### Hyper-param tuning

In [9]:
depths = [1, 3, 5, 10, 12, 15, 20]
estimators = [1, 10, 30, 50, 80, 100, 150]

best_params = (0, 0)
best_accuracy = 0
for depth in depths:
    for estimator in estimators:
        model = RandomForestClassifier(max_depth=depth, n_estimators=estimator, criterion='entropy', max_features='sqrt')
        model.fit(all_track_data, np.ravel(all_track_labels))
        y_pred = model.predict(X_val)
        val_acc = accuracy(y_val, y_pred)
        print("Depth:", depth, "| Estimators:", estimator, "| Accuracy", val_acc)
        if val_acc > best_accuracy:
            best_accuracy = val_acc
            best_params = (depth, estimator)

Depth: 1 | Estimators: 1 | Accuracy 0.12266112266112267
Depth: 1 | Estimators: 10 | Accuracy 0.15176715176715178
Depth: 1 | Estimators: 30 | Accuracy 0.16424116424116425
Depth: 1 | Estimators: 50 | Accuracy 0.1683991683991684
Depth: 1 | Estimators: 80 | Accuracy 0.1704781704781705
Depth: 1 | Estimators: 100 | Accuracy 0.1683991683991684
Depth: 1 | Estimators: 150 | Accuracy 0.16216216216216217
Depth: 3 | Estimators: 1 | Accuracy 0.1891891891891892
Depth: 3 | Estimators: 10 | Accuracy 0.23492723492723494
Depth: 3 | Estimators: 30 | Accuracy 0.22661122661122662
Depth: 3 | Estimators: 50 | Accuracy 0.22661122661122662
Depth: 3 | Estimators: 80 | Accuracy 0.24740124740124741
Depth: 3 | Estimators: 100 | Accuracy 0.24324324324324326
Depth: 3 | Estimators: 150 | Accuracy 0.2390852390852391
Depth: 5 | Estimators: 1 | Accuracy 0.21205821205821207
Depth: 5 | Estimators: 10 | Accuracy 0.3097713097713098
Depth: 5 | Estimators: 30 | Accuracy 0.31392931392931395
Depth: 5 | Estimators: 50 | Accuracy

In [10]:
print("Best Parameters and Accuracy:")
print("Depth:", best_params[0], "| Estimators:", best_params[1], "| Accuracy", best_accuracy)

Best Parameters and Accuracy:
Depth: 15 | Estimators: 80 | Accuracy 0.8877338877338877


#### Something else