<a href="https://colab.research.google.com/github/datascience-uniandes/classification_tutorial/blob/master/music/music_multiclass.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Estimate the gender of a song based on some intrinsic attribues (multi-class)

MINE-4101: Applied Data Science  
Univerisdad de los Andes  
  
Last update: October, 2023

In [None]:
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.neural_network import MLPClassifier

from sklearn.metrics import ConfusionMatrixDisplay, precision_score, recall_score, f1_score

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
pd.options.display.max_columns = 100

### Reading the dataset

In [None]:
music_df = pd.read_csv('./data/msd_genre_dataset.txt')

In [None]:
music_df.dtypes

In [None]:
music_df.head()

In [None]:
music_df['genre'].value_counts(normalize = True)

In [None]:
music_df.loc[music_df['genre'] == 'classic pop and rock', 'genre'] = 0
music_df.loc[music_df['genre'] == 'folk', 'genre'] = 1
music_df.loc[music_df['genre'] == 'dance and electronica', 'genre'] = 2
music_df.loc[music_df['genre'] == 'jazz and blues', 'genre'] = 3
music_df.loc[music_df['genre'] == 'soul and reggae', 'genre'] = 4
music_df.loc[music_df['genre'] == 'punk', 'genre'] = 5
music_df.loc[music_df['genre'] == 'metal', 'genre'] = 6
music_df.loc[music_df['genre'] == 'classical', 'genre'] = 7
music_df.loc[music_df['genre'] == 'pop', 'genre'] = 8
music_df.loc[music_df['genre'] == 'hip-hop', 'genre'] = 9

In [None]:
music_df['genre'] = music_df['genre'].astype(int)

### Splitting train and test datasets

In [None]:
X = music_df[music_df.columns.tolist()[4:]]

In [None]:
Y = music_df['genre']

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, stratify = Y)

In [None]:
pd.Series(Y_train).value_counts(normalize = True)

In [None]:
pd.Series(Y_test).value_counts(normalize = True)

### Preprocessing feature matrix

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

In [None]:
X_test = scaler.transform(X_test)

### Training a multi-class Neural Networks model

In [None]:
neural_model = MLPClassifier((300, 300, 300, 300))

In [None]:
neural_model.fit(X_train, Y_train)

In [None]:
predictions = neural_model.predict(X_test)

In [None]:
fig, ax = plt.subplots(figsize = (20, 20))
ConfusionMatrixDisplay.from_predictions(Y_test, predictions, display_labels = ['classic pop and rock', 'folk', 'dance and electronica', 'jazz and blues', 'soul and reggae', 'punk', 'metal', 'classical', 'pop', 'hip-hop'], normalize = 'true').plot(ax = ax)
