<a href="https://colab.research.google.com/github/allispaul/audiobot/blob/main/AG_fmasmall_XGBoost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# mount google drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [83]:
import os
import multiprocessing
import ast
import numpy as np
import pandas as pd
import librosa
from tqdm import tqdm
from pathlib import Path

from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score

from xgboost import XGBClassifier

In [96]:
# Model assessment function
def model_assess(model, title="Default"):
    model.fit(X_train,y_train)
    preds = model.predict(X_val)
    print('Accuracy', title, ":", round(accuracy_score(y_val,preds), 5), "\n")
    print('F-Score', title, ":", round(f1_score(y_val,preds, average="macro"),5), "\n")

In [16]:
# get track metadata
METADATA_DIR = Path("/content/drive/MyDrive/Audiobots/Data/fma_metadata")

tracks_info = pd.read_csv(METADATA_DIR / 'tracks.csv', index_col=0, header=[0, 1])
tracks_info_subset = tracks_info.copy()
tracks_info_subset.columns = ['_'.join(col).strip() for col in tracks_info_subset.columns.values]
tracks_info_subset = tracks_info_subset[["set_split","set_subset","track_duration","track_genre_top"]]

## FMA-Small

The section below gets track information from fma metadata, imports fma_small, merges and cleans the data to get it ready for XGBoost.

In [19]:
# Track info for fma_small
tracks_small = tracks_info_subset[tracks_info_subset["set_subset"]=="small"]
tracks_small.shape, tracks_small["track_duration"].describe()

In [36]:
# Track info for fma_medium
tracks_med = tracks_info_subset[tracks_info_subset["set_subset"]=="medium"]
tracks_med.shape, tracks_med["track_duration"].describe()

((17000, 4),
 count    17000.000000
 mean       230.409824
 std        103.986197
 min         60.000000
 25%        156.000000
 50%        213.000000
 75%        285.000000
 max        600.000000
 Name: track_duration, dtype: float64)

In [106]:
# Import fma_small
data_path = "/content/drive/MyDrive/Audiobots/Data/fmasmall_features_3sec.csv"

#fma_small = pd.read_csv(data_path,header=None)
fma_small = pd.read_csv(data_path,index_col=0,header=[0,1,2,3])
fma_small.columns = ['_'.join(col).strip() for col in fma_small.columns.values]

In [107]:
# Merge track info with features
# 6400 training, 800 validation, 800 test set
# FMA_small is balanced - each genre has 1000 observations with 8 genres
fma_small_full = tracks_small.merge(fma_small,left_index=True,right_index=True)
fma_small_full.head()

Unnamed: 0_level_0,set_split,set_subset,track_duration,track_genre_top,chroma_cens_max_01_pos_01,chroma_cens_max_01_pos_02,chroma_cens_max_01_pos_03,chroma_cens_max_01_pos_04,chroma_cens_max_01_pos_05,chroma_cens_max_01_pos_06,...,zcr_std_01_pos_01,zcr_std_01_pos_02,zcr_std_01_pos_03,zcr_std_01_pos_04,zcr_std_01_pos_05,zcr_std_01_pos_06,zcr_std_01_pos_07,zcr_std_01_pos_08,zcr_std_01_pos_09,zcr_std_01_pos_10
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,training,small,168,Hip-Hop,0.704124,0.670529,0.731929,0.699341,0.558846,0.546863,...,0.058834,0.086403,0.061894,0.07197,0.069562,0.050927,0.044658,0.055987,0.06627,0.089225
5,training,small,206,Hip-Hop,0.386654,0.396511,0.430391,0.420343,0.459678,0.469663,...,0.039076,0.045397,0.051211,0.040011,0.036974,0.029119,0.041461,0.033762,0.072484,0.060742
10,training,small,161,Pop,0.41363,0.393576,0.306028,0.380294,0.286272,0.330367,...,0.021721,0.034129,0.020003,0.018932,0.016606,0.0156,0.018541,0.018028,0.018082,0.014767
140,training,small,253,Folk,0.474108,0.244633,0.398619,0.297416,0.225072,0.398411,...,0.013485,0.026601,0.033925,0.023193,0.039791,0.038043,0.039282,0.007803,0.014162,0.064145
141,training,small,182,Folk,0.519412,0.436285,0.505933,0.355401,0.042728,0.354588,...,0.059952,0.015979,0.045776,0.044014,0.021849,0.030574,0.014621,0.011454,0.022677,0.026572


## Chunks as rows

In [188]:
# Reshape fma_small into long format
data = fma_small_full.reset_index()
id_vars = ['track_id', 'set_split', 'set_subset', 'track_duration', 'track_genre_top']
id_vars2 = ['set_split', 'set_subset', 'track_duration']
melted_data = pd.melt(data, id_vars=id_vars, var_name='feature', value_name='value')
melted_data['feature_base'] = melted_data['feature'].apply(lambda x: '_'.join(x.split('_')[:-2]))
melted_data['position'] = melted_data['feature'].apply(lambda x: x.split('_')[-1])
melted_data['track_pos'] = melted_data['track_id'].astype(str) + "_" + melted_data['position'].astype(str)


In [194]:
# Put df back in the correct long format
index_vars = ['track_pos', 'set_split', 'set_subset', 'track_duration', 'track_genre_top']
fma_small_long = melted_data.pivot_table(index=index_vars, columns='feature_base', values='value', aggfunc='first').reset_index()

In [195]:
fma_small_long.head()

feature_base,track_pos,set_split,set_subset,track_duration,track_genre_top,chroma_cens_max_01,chroma_cens_max_02,chroma_cens_max_03,chroma_cens_max_04,chroma_cens_max_05,...,tonnetz_std_02,tonnetz_std_03,tonnetz_std_04,tonnetz_std_05,tonnetz_std_06,zcr_max_01,zcr_mean_01,zcr_median_01,zcr_min_01,zcr_std_01
0,100478_01,training,small,146,Hip-Hop,0.465381,0.368193,0.214262,0.413002,0.466494,...,0.059723,0.095144,0.077511,0.042451,0.044458,0.148438,0.056754,0.054932,0.008789,0.025644
1,100478_02,training,small,146,Hip-Hop,0.479353,0.534042,0.456519,0.415041,0.417984,...,0.055869,0.087288,0.108697,0.037816,0.035881,0.208496,0.05729,0.052246,0.007324,0.030525
2,100478_03,training,small,146,Hip-Hop,0.417255,0.417274,0.449833,0.408672,0.470762,...,0.05511,0.081682,0.084335,0.025959,0.028613,0.152832,0.050274,0.048828,0.009277,0.023234
3,100478_04,training,small,146,Hip-Hop,0.425173,0.423116,0.29048,0.365537,0.537459,...,0.065253,0.093127,0.097447,0.0283,0.03995,0.11377,0.048196,0.043457,0.006348,0.021784
4,100478_05,training,small,146,Hip-Hop,0.456591,0.562723,0.451581,0.355133,0.471522,...,0.068087,0.089075,0.080989,0.037026,0.030162,0.17627,0.06137,0.057129,0.006348,0.031257


## FMA_small Long Format XGBoost



In [210]:
X_train = fma_small_long[fma_small_long["set_split"]=="training"]
X_train = X_train.drop(["track_pos","set_split","set_subset","track_duration","track_genre_top"], axis=1)
y_train = fma_small_long[fma_small_long["set_split"]=="training"]["track_genre_top"]
X_val = fma_small_long[fma_small_long["set_split"]=="validation"]
X_val = X_val.drop(["track_pos","set_split","set_subset","track_duration","track_genre_top"], axis=1)
y_val = fma_small_long[fma_small_long["set_split"]=="validation"]["track_genre_top"]

In [203]:
# Encode DV
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_val = label_encoder.transform(y_val)

In [211]:
# Normalize X features
cols = X_train.columns

scaler = MinMaxScaler()

# Training IV features
X_train_scaled = scaler.fit_transform(X_train)
X_train = pd.DataFrame(X_train_scaled, columns = cols)

# Test IV features
X_val_scaled = scaler.transform(X_val)
X_test = pd.DataFrame(X_val_scaled, columns=cols)

In [None]:
# XGBoost with GPUs

xgb_gpu = XGBClassifier(n_estimators=1000, learning_rate=0.05, tree_method="gpu_hist")
model_assess(xgb_gpu, "Cross Gradient Booster with GPUs_Long Format")

## FMA_small wide format XGBoost

In [88]:
# drop index
fma_small_full = fma_small_full.reset_index(drop=True)
fma_small_full.head()

Unnamed: 0,set_split,set_subset,track_duration,track_genre_top,chroma_cens_max_01_pos_01,chroma_cens_max_01_pos_02,chroma_cens_max_01_pos_03,chroma_cens_max_01_pos_04,chroma_cens_max_01_pos_05,chroma_cens_max_01_pos_06,...,zcr_std_01_pos_01,zcr_std_01_pos_02,zcr_std_01_pos_03,zcr_std_01_pos_04,zcr_std_01_pos_05,zcr_std_01_pos_06,zcr_std_01_pos_07,zcr_std_01_pos_08,zcr_std_01_pos_09,zcr_std_01_pos_10
0,training,small,168,Hip-Hop,0.704124,0.670529,0.731929,0.699341,0.558846,0.546863,...,0.058834,0.086403,0.061894,0.07197,0.069562,0.050927,0.044658,0.055987,0.06627,0.089225
1,training,small,206,Hip-Hop,0.386654,0.396511,0.430391,0.420343,0.459678,0.469663,...,0.039076,0.045397,0.051211,0.040011,0.036974,0.029119,0.041461,0.033762,0.072484,0.060742
2,training,small,161,Pop,0.41363,0.393576,0.306028,0.380294,0.286272,0.330367,...,0.021721,0.034129,0.020003,0.018932,0.016606,0.0156,0.018541,0.018028,0.018082,0.014767
3,training,small,253,Folk,0.474108,0.244633,0.398619,0.297416,0.225072,0.398411,...,0.013485,0.026601,0.033925,0.023193,0.039791,0.038043,0.039282,0.007803,0.014162,0.064145
4,training,small,182,Folk,0.519412,0.436285,0.505933,0.355401,0.042728,0.354588,...,0.059952,0.015979,0.045776,0.044014,0.021849,0.030574,0.014621,0.011454,0.022677,0.026572


In [93]:
# Prepare data for XGBoost
X_train = fma_small_full[fma_small_full["set_split"]=="training"]
X_train = X_train.drop(["set_split","set_subset","track_duration","track_genre_top"], axis=1)
y_train = fma_small_full[fma_small_full["set_split"]=="training"]["track_genre_top"]
X_val = fma_small_full[fma_small_full["set_split"]=="validation"]
X_val = X_val.drop(["set_split","set_subset","track_duration","track_genre_top"], axis=1)
y_val = fma_small_full[fma_small_full["set_split"]=="validation"]["track_genre_top"]

In [94]:
# Encode DV
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_val = label_encoder.transform(y_val)

In [95]:
# Normalize X features
cols = X_train.columns

scaler = MinMaxScaler()

# Training IV features
X_train_scaled = scaler.fit_transform(X_train)
X_train = pd.DataFrame(X_train_scaled, columns = cols)

# Test IV features
X_val_scaled = scaler.transform(X_val)
X_val = pd.DataFrame(X_val_scaled, columns=cols)

In [98]:
# XGBoost with GPUs

xgb_gpu = XGBClassifier(n_estimators=1000, learning_rate=0.05, tree_method="gpu_hist")
model_assess(xgb_gpu, "Cross Gradient Booster with GPUs")


    E.g. tree_method = "hist", device = "cuda"



Accuracy Cross Gradient Booster with GPUs : 0.14 

F-Score Cross Gradient Booster with GPUs : 0.08754 




    E.g. tree_method = "hist", device = "cuda"

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




In [100]:
id_vars = ["set_split","set_subset","track_duration","track_genre_top"]
fma_small_long = fma_small_full.melt(id_vars=id_vars, var_name="feature", value_name="value")
fma_small_long.head()

Unnamed: 0,set_split,set_subset,track_duration,track_genre_top,feature,value
0,training,small,168,Hip-Hop,chroma_cens_max_01_pos_01,0.704124
1,training,small,206,Hip-Hop,chroma_cens_max_01_pos_01,0.386654
2,training,small,161,Pop,chroma_cens_max_01_pos_01,0.41363
3,training,small,253,Folk,chroma_cens_max_01_pos_01,0.474108
4,training,small,182,Folk,chroma_cens_max_01_pos_01,0.519412


## FMA-Medium

The section below gets track information from fma metadata, imports fma_medium, merges and cleans the data to get it ready for XGBoost.

In [None]:
# Import fma_medium
data_path_med = "/content/drive/MyDrive/Audiobots/Data/fma_features_3sec.csv"

#fma_small = pd.read_csv(data_path,header=None)
fma_med = pd.read_csv(data_path_med,index_col=0,header=[0,1,2,3])
fma_med.columns = ['_'.join(col).strip() for col in fma_med.columns.values]

In [None]:
# Merge track info with features
# X training, X validation, X test set
# FMA_medium is highly imbalanced - 16 genres,
fma_med_full = tracks_med.merge(fma_med,left_index=True,right_index=True)
fma_med_full.head()

In [None]:
# drop index
fma_med_full = fma_med_full.reset_index(drop=True)
fma_med_full.head()

In [None]:
# Prepare data for XGBoost
X_train = fma_med_full[fma_med_full["set_split"]=="training"]
X_train = X_train.drop(["set_split","set_subset","track_duration","track_genre_top"], axis=1)
y_train = fma_med_full[fma_med_full["set_split"]=="training"]["track_genre_top"]
X_val = fma_med_full[fma_med_full["set_split"]=="validation"]
X_val = X_val.drop(["set_split","set_subset","track_duration","track_genre_top"], axis=1)
y_val = fma_med_full[fma_med_full["set_split"]=="validation"]["track_genre_top"]

In [None]:
# Encode DV
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_val = label_encoder.transform(y_val)

In [None]:
# Normalize X features
cols = X_train.columns

scaler = MinMaxScaler()

# Training IV features
X_train_scaled = scaler.fit_transform(X_train)
X_train = pd.DataFrame(X_train_scaled, columns = cols)

# Test IV features
X_test_scaled = scaler.transform(X_val)
X_test = pd.DataFrame(X_test_scaled, columns=cols)

In [None]:
# XGBoost with GPUs

xgb_gpu = XGBClassifier(n_estimators=1000, learning_rate=0.05, tree_method="gpu_hist")
model_assess(xgb_gpu, "Cross Gradient Booster with GPUs")