In [2]:
import numpy as np
from sklearn import model_selection, preprocessing
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


random_state = 42

In [3]:
# Use the same method of loading the data as in previous sub problems

input_file = "Data/features_3_sec.csv"

with open(input_file) as f:
    f.readline()
    data = np.loadtxt(f, delimiter=',', dtype=np.object_)

labels = [
    'blues', 'classical', 'country', 'disco', 'hiphop', 
    'jazz', 'metal', 'pop', 'reggae', 'rock'
]

X = data[:, 1:-1].astype(np.float32)

min_max_scaler = preprocessing.MinMaxScaler((-1, 1))
X = min_max_scaler.fit_transform(X)

y = [labels.index(l) for l in data[:, -1]]


train_X, test_X, train_y, test_y = model_selection.train_test_split(X, y, test_size=0.2, random_state=random_state)

In [None]:
# Grid Search with Random Forest

parameter_grid = {
    'n_estimators': [512],
    'max_depth': [None],
}

grid_search = model_selection.GridSearchCV(
    RandomForestClassifier(random_state=random_state), 
    parameter_grid, 
    cv=5, 
    verbose=5
)

grid_search.fit(train_X, train_y)

print(f"Best score: {grid_search.best_score_:.4f}")
print(f"Best parameters: {grid_search.best_params_}")

test_y_pred = grid_search.predict(test_X)
print(f"Accuracy: {accuracy_score(test_y, test_y_pred):.4f}")

### Random Forest

Throwing together an untuned forest classifier against the 3 second features performed very well even to begin with, resulting in an 88% accuracy. Performing a grid search on that essentially told us that none of the parameters resulted in significant improvement except `n_estimators` and even with that we were only able to get up to 89% accuracy by upping it to 512.

In [None]:
# Grid search with SVM

parameter_grid_svm = {
    'C': [1.0],
    'gamma': [1.0],
    'kernel': ['rbf'],
}

grid_search_svm = model_selection.GridSearchCV(
    SVC(random_state=random_state),
    parameter_grid_svm,
    cv=5,
    verbose=5
)

grid_search_svm.fit(train_X, train_y)

print(f"Best score: {grid_search_svm.best_score_:.4f}")
print(f"Best parameters: {grid_search_svm.best_params_}")

test_y_pred = grid_search_svm.predict(test_X)
print(f"Accuracy: {accuracy_score(test_y, test_y_pred):.4f}")

### SVM

While the first attempt at SVM didn't go quite as well as the first attempt as Random Forest, it still gave us a very respectable 73% accuracy. A grid search for the C and gamma hyperparameters got that up to 92%.