# [FMA: A Dataset For Music Analysis](https://github.com/mdeff/fma)

Michaël Defferrard, Kirell Benzi, Pierre Vandergheynst, Xavier Bresson, EPFL LTS2.

## Setup: Import Packages and FMA Data

In [1]:
%matplotlib inline

import os
from pathlib import Path

import IPython.display as ipd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.impute import SimpleImputer

import utils.fma_extraction as fma_utils
import utils.ml_training as ml_utils

plt.rcParams["figure.figsize"] = (17, 5)

In [None]:
# Directory where mp3 are stored.
FMA_METADATA_DIRECTORY = os.getenv("FMA_METADATA_DIRECTORY")
FMA_AUDIO_DIRECTORY = os.getenv("FMA_AUDIO_DIRECTORY")

# Load metadata and features.
tracks = fma_utils.load(Path(FMA_METADATA_DIRECTORY) / "tracks.csv")
genres = fma_utils.load(Path(FMA_METADATA_DIRECTORY) / "genres.csv")
features = fma_utils.load(Path(FMA_METADATA_DIRECTORY) / "features.csv")
echonest = fma_utils.load(Path(FMA_METADATA_DIRECTORY) / "echonest.csv")

np.testing.assert_array_equal(features.index, tracks.index)
assert echonest.index.isin(tracks.index).all()  # noqa: S101

# Merge features with echonest.
features = features.merge(echonest, left_index=True, right_index=True, how="left")
print("Features columns:", features.columns.to_list())

tracks.shape, genres.shape, features.shape, echonest.shape

## Echonest features

In [None]:
# Add all features under ('echonest', 'audio_features')
echonest_features = [
    col for col in features.columns
    if col[:2] == ("echonest", "audio_features")
        or col[:2] == ("echonest", "temporal_features")
]
print(echonest_features)

In [None]:
print("{1} features for {0} tracks".format(*echonest.shape))
ipd.display(echonest["echonest", "audio_features"].head())
print("Audio features include", echonest["echonest", "audio_features"].columns)

In [None]:
ipd.display(echonest["echonest", "temporal_features"].head())
x = echonest.loc[2, ("echonest", "temporal_features")]
plt.plot(x)

## Classification by Features

In [None]:
medium = (tracks["set", "subset"] <= "medium")  # Filters for rows where the subset is "medium"

# Create a boolean mask for the training, validation, and test data (where "set" is "training/validation/test")
train = tracks["set", "split"] == "training"
val = tracks["set", "split"] == "validation"
test = tracks["set", "split"] == "test"

# Select the genre labels (target) and feature values for each split
y_train = tracks.loc[medium & train, ("track", "genre_top")]
y_val = tracks.loc[medium & val, ("track", "genre_top")]
y_test = tracks.loc[medium & test, ("track", "genre_top")]

# Select the feature values for each split
X_train_librosa = features.loc[medium & train, ml_utils.feature_columns].values
X_val_librosa = features.loc[medium & val, ml_utils.feature_columns].values
X_test_librosa = features.loc[medium & test, ml_utils.feature_columns].values

X_train_echonest = features.loc[medium & train, ("echonest", ["audio_features"])].values
X_val_echonest = features.loc[medium & val, ("echonest", ["audio_features"])].values
X_test_echonest = features.loc[medium & test, ("echonest", ["audio_features"])].values

# Combine both feature sets for each split
X_train_combined = np.hstack([X_train_librosa, X_train_echonest])
X_val_combined = np.hstack([X_val_librosa, X_val_echonest])
X_test_combined = np.hstack([X_test_librosa, X_test_echonest])

# Handle missing values (if any) using SimpleImputer
imputer = SimpleImputer(strategy="mean")
X_train = imputer.fit_transform(X_train_combined)
X_val = imputer.transform(X_val_combined)
X_test = imputer.transform(X_test_combined)

# Check the shapes of the feature arrays
print(X_train.shape)
print(X_test.shape)
print(X_val.shape)

# Flatten the feature arrays if needed (if features are multidimensional, e.g., MFCC)
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)
X_val = X_val.reshape(X_val.shape[0], -1)

print(f"{y_train.size} training examples, {y_test.size} testing examples")
print(f"{X_train.shape[1]} features, {np.unique(y_train).size} classes")

In [None]:
nan_columns = features.isna().sum()
print(nan_columns[nan_columns > 0])


In [None]:
# Specify feature reduction and classifier
reduce_features = False
model_classifer = "SVM"

# Preprocess data, removing features if specified
X_train, X_test, y_train_encoded, y_test_encoded = ml_utils.preprocess_data(
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    reduce_features=reduce_features)

# Train, evaluate, and retrieve the specified model
clf, score = ml_utils.train_and_evaluate(
    X_train=X_train,
    y_train_encoded=y_train_encoded,
    X_test=X_test,
    y_test_encoded=y_test_encoded,
    model_classifier=model_classifer,
)



## Validation on SVM Model

In [None]:
import sklearn.svm

# Train your model
clf = sklearn.svm.SVC()
clf.fit(X_train, y_train)

validation_score = clf.score(X_val, y_val)
print(f"Validation score: {validation_score:.2f}")


## Validation on KNN Model

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier

# Prepare your data (make sure you have X_train, y_train, X_test, y_test already set)
# Here we assume X_train, y_train, X_test, y_test have been defined earlier in your code.

# Initialize the KNN model
knn = KNeighborsClassifier(n_neighbors=5)  # You can tune 'n_neighbors'

# Train the model
knn.fit(X_train, y_train)

# Predict on the validation/test data
y_val_pred = knn.predict(X_val)

# Evaluate the model
accuracy = accuracy_score(y_val, y_val_pred)
print(f"KNN Validation Accuracy: {accuracy:.2%}")

# Confusion Matrix to evaluate class-wise performance
cm = confusion_matrix(y_val, y_val_pred)

# Plot the confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(
    cm,
    annot=True,
    fmt="d",
    cmap="Blues",
    xticklabels=tracks[("track", "genre_top")].unique(),
    yticklabels=tracks[("track", "genre_top")].unique(),
)
plt.title("KNN Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()
