## Imports

In [1]:
import numpy as np
# import seaborn as sns
# import matplotlib.pyplot as plt
# from mpl_toolkits.mplot3d import axes3d
# from joblib import dump, load
import warnings
warnings.filterwarnings('ignore') 


import librosa
from librosa import display

# from sklearn.metrics import confusion_matrix, classification_report
# from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
# from sklearn.utils.multiclass import unique_labels
# from sklearn.decomposition import PCA
# from sklearn.cross_decomposition import CCA
# from sklearn.utils import shuffle
# from sklearn.preprocessing import StandardScaler

# from sklearn.svm import SVC
# from sklearn.linear_model import LogisticRegression
# from sklearn.ensemble import RandomForestClassifier

## Load Data

In [3]:
all_genres = ['blues','classical','country','disco','hiphop','jazz','metal','pop','reggae','rock']
num_songs = 100
sr = 22050
Y_LIMIT = 660000

In [4]:
class DataLoader():
    def __init__(self):
        self.X = None
        self.X_mfcc = None
        self.X_mfcc_random_crop = None
        self.X_mfcc_fixed_crop = None
        self.X_chroma = None
        self.Y = None
        
        self.SEG = 10
        self.SEG_LENGTH = int(1290/self.SEG)
        self.RANDOM_SEG_LENGTH = 1200
        self.RANDOM_STARTS = np.random.randint(low=0, high=1290-self.RANDOM_SEG_LENGTH, size=(1000, self.SEG))
        
    def save_raw(self, genres=all_genres, songs=num_songs):
        assert(self.X is None and self.Y is None)
        X, Y = None, None
        for g_idx, g in enumerate(genres):
            for s_idx in range(songs):
                y, sr = librosa.load(f'genres/{g}/{g}.000{s_idx:02d}.wav')
                y = y[:Y_LIMIT]
                if X is None:
                    X = y.reshape(1, y.shape[0])
                    Y = np.array([[g_idx]])
                else:
                    X = np.vstack([X, y])
                    Y = np.vstack([Y, np.array([[g_idx]])])
        Y = Y.ravel()
        self.X = X
        self.Y = Y
        np.savetxt('data/X.csv', X)
        np.savetxt('data/Y.csv', Y)
        
    def save_mfcc(self, genres=all_genres, songs=num_songs):
        assert(self.X_mfcc is None)
        X_mfcc = None
        for g_idx, g in enumerate(genres):
            for s_idx in range(songs):
                y, sr = librosa.load(f'genres/{g}/{g}.000{s_idx:02d}.wav')
                y = y[:Y_LIMIT]
                mfcc = librosa.feature.mfcc(y, sr=sr, hop_length=512, n_mfcc=13).flatten()
                if X_mfcc is None:
                    X_mfcc = mfcc.reshape(1, mfcc.shape[0])
                else:
                    X_mfcc = np.vstack([X_mfcc, mfcc])
        self.X_mfcc = X_mfcc
        np.savetxt('data/X_mfcc.csv', X_mfcc)
    
    def save_mfcc_random_crop(self):
        X_mfcc_crop = None
        for i, mfcc in enumerate(self.X_mfcc):
            crop = None
            for j in range(self.SEG):
                random_start = self.RANDOM_STARTS[i][j]
                random_seg = np.vstack([mfcc[1290*k+random_start : 1290*k+random_start+self.RANDOM_SEG_LENGTH] for k in range(13)])
                random_seg = random_seg.reshape(1, random_seg.shape[0], random_seg.shape[1])
                if crop is None:
                    crop = random_seg
                else:
                    crop = np.vstack([crop, random_seg])
            if X_mfcc_crop is None:
                X_mfcc_crop = crop
            else:
                X_mfcc_crop = np.vstack([X_mfcc_crop, crop])
        self.X_mfcc_random_crop = X_mfcc_crop
        np.savetxt('data/X_mfcc_random_crop.csv', X_mfcc_crop.reshape(10000, 13*self.RANDOM_SEG_LENGTH))
    
    def save_mfcc_fixed_crop(self):
        '''
        Evenly divides each song into 10 segments,
        producing a 10000 by 13 by 129 array of MFCC coefficients for the segments.
        Reshapes into 10000*1677 in order to save as a CSV.
        '''
        X_mfcc_crop = None
        for mfcc in self.X_mfcc:
            crop = np.stack([np.vstack([mfcc[1290*j+self.SEG_LENGTH*i : 1290*j+self.SEG_LENGTH*(i+1)] for j in range(13)]) for i in range(self.SEG)], axis=0)
            if X_mfcc_crop is None:
                X_mfcc_crop = crop
            else:
                X_mfcc_crop = np.vstack([X_mfcc_crop, crop])
        self.X_mfcc_fixed_crop = X_mfcc_crop
        np.savetxt('data/X_mfcc_fixed_crop.csv', X_mfcc_crop.reshape(10000, 13*self.SEG_LENGTH))
    
    def save_chroma(self, genres=all_genres, songs=num_songs):
        assert(self.X_chroma is None)
        X_chroma = None
        for g_idx, g in enumerate(genres):
            for s_idx in range(songs):
                y, sr = librosa.load(f'genres/{g}/{g}.000{s_idx:02d}.wav')
                y = y[:Y_LIMIT]
                chroma = librosa.feature.chroma_cqt(y, sr=sr, hop_length=512).flatten()
                if X_chroma is None:
                    X_chroma = chroma.reshape(1, chroma.shape[0])
                else:
                    X_chroma = np.vstack([X_chroma, chroma])
        self.X_chroma = X_chroma
        np.savetxt('data/X_chroma.csv', X_chroma)

    def save_chroma_random_crop(self):
        #Chroma: array of length 15480 = 1290*12
        X_chroma_crop = None
        for i, chroma in enumerate(self.X_chroma):
            crop = None
            for j in range(self.SEG):
                random_start = self.RANDOM_STARTS[i][j]
                random_seg = np.vstack([chroma[1290*k+random_start : 1290*k+random_start+self.RANDOM_SEG_LENGTH] for k in range(12)])
                random_seg = random_seg.reshape(1, random_seg.shape[0], random_seg.shape[1])
                if crop is None:
                    crop = random_seg
                else:
                    crop = np.vstack([crop, random_seg])
            if X_chroma_crop is None:
                X_chroma_crop = crop
            else:
                X_chroma_crop = np.vstack([X_chroma_crop, crop])
        self.X_chroma_fixed_crop = X_chroma_crop
        np.savetxt('data/X_chroma_fixed_crop.csv', X_chroma_crop.reshape(10000, 12*self.RANDOM_SEG_LENGTH))
        
    def save_chroma_fixed_crop(self):
        #Chroma: array of length 15480 = 1290*12
        X_chroma_crop = None
        for chroma in self.X_chroma:
            crop = np.stack([np.vstack([mfcc[1290*j+self.SEG_LENGTH*i : 1290*j+self.SEG_LENGTH*(i+1)] for j in range(12)]) for i in range(self.SEG)], axis=0)
            if X_chroma_crop is None:
                X_chroma_crop = crop
            else:
                X_chroma_crop = np.vstack([X_chroma_crop, crop])
        self.X_chroma_fixed_crop = X_chroma_crop
        np.savetxt('data/X_chroma_fixed_crop.csv', X_chroma_crop.reshape(10000, 12*self.SEG_LENGTH))
    
    def load_raw(self):
        self.X_raw = np.loadtxt('data/X.csv')
        
    def load_mfcc(self):
        self.X_mfcc = np.loadtxt('data/X_mfcc.csv')
    
    def load_mfcc_random_crop(self):
        self.X_mfcc_random_crop = np.loadtxt('data/X_mfcc_random_crop.csv').reshape(10000, 13, self.RANDOM_SEG_LENGTH)
        
    def load_mfcc_fixed_crop(self):
        self.X_mfcc_fixed_crop = np.loadtxt('data/X_mfcc_fixed_crop.csv').reshape(10000, 13, self.SEG_LENGTH)
    
    def load_chroma_random_crop(self):
        self.X_chroma_random_crop = np.loadtxt('data/X_chroma_random_crop.csv').reshape(10000, 12, self.RANDOM_SEG_LENGTH)
        
    def load_chroma_fixed_crop(self):
        self.X_chroma_fixed_crop = np.loadtxt('data/X_chroma_fixed_crop.csv').reshape(10000, 12, self.SEG_LENGTH)
        
    def load_mfcc(self):
        self.X_mfcc = np.loadtxt('data/X_mfcc.csv')

    def load_chroma(self):
        self.X_chroma = np.loadtxt('data/X_chroma.csv')
    
    def load_Y(self):
        self.Y = np.loadtxt('data/Y.csv')

In [6]:
# # Only need to run this ONCE!
# # Saves features and pca objects to data/...
# dl = DataLoader()
# dl.save_raw(genres=all_genres, songs=100)
# dl.save_mfcc()
# dl.save_chroma()
# dl.save_mfcc_fixed_crop()
# dl.save_mfcc_random_crop()
# dl.save_chroma_fixed_crop()
# dl.save_chroma_random_crop()

In [12]:
# Load from CSVs instead of saving
dl = DataLoader()
# dl.load_mfcc()
# dl.load_chroma()
# dl.load_mfcc_fixed_crop()
# dl.load_mfcc_random_crop()
# dl.load_chroma_fixed_crop()
# dl.load_chroma_random_crop()
# dl.load_Y()

(1000, 15480)


## Preprocessing

In [None]:
# # Only need to run this ONCE!
# Generates train and test indexes among the 1000 datapoints
# x = np.array([i for i in range(1000)])
# np.random.seed(1)
# train_idxs = np.random.choice(x, size=750, replace=False)
# train_idxs.sort()
# test_idxs = []
# for i in range(1000):
#     if i not in train_idxs:
#         test_idxs.append(i)
# test_idxs = np.array(test_idxs)
# np.savetxt('data/train_idxs.csv', train_idxs)
# np.savetxt('data/test_idxs.csv', test_idxs)

In [10]:
# Load the train/test indexes instead of shuffling
train_idxs = np.loadtxt('data/train_idxs.csv').astype(int)
test_idxs = np.loadtxt('data/test_idxs.csv').astype(int)
print(train_idxs[-1], test_idxs[-1])

999 993


In [11]:
scaler = StandardScaler()
MX = scaler.fit_transform(dl.X_mfcc)
MX_train, MX_test = np.take(MX, train_idxs, 0), np.take(MX, test_idxs, 0)
print(MX_train.shape, MX_test.shape)

(750, 16770) (250, 16770)


In [89]:
scaler = StandardScaler()
CX = scaler.fit_transform(dl.X_chroma)
CX_train, CX_test = np.take(CX, train_idxs, 0), np.take(CX, test_idxs, 0)
print(CX_train.shape, CX_test.shape)

(750, 15480) (250, 15480)


In [12]:
scaler = StandardScaler()
X = np.hstack([dl.X_mfcc, dl.X_chroma])
X = scaler.fit_transform(X)
X_train, X_test = np.take(X, train_idxs, 0), np.take(X, test_idxs, 0)
print(X_train.shape, X_test.shape)

(750, 32250) (250, 32250)


In [13]:
scaler = StandardScaler()
PX = np.hstack([dl.X_mfcc_pca, dl.X_chroma_pca])
PX = scaler.fit_transform(PX)
PX_train, PX_test = np.take(PX, train_idxs, 0), np.take(PX, test_idxs, 0)
print(PX_train.shape, PX_test.shape)

(750, 200) (250, 200)


In [14]:
Y = dl.Y
Y_train, Y_test = np.take(Y, train_idxs, 0), np.take(Y, test_idxs, 0)
print(Y_train.shape, Y_test.shape)

(750,) (250,)
