## (Just copied Brendan's notebook and will change the data being used)
## Good features to try: MFCC, chroma, spectral flatness, zero crossing rate
## Not good: tempo

In [3]:
import numpy as np
import librosa
from librosa import display
import matplotlib.pyplot as plt

### Load Data

In [4]:
all_genres = ['blues','classical','country','disco','hiphop','jazz','metal','pop','reggae','rock']
num_songs = 100
Y_LIMIT = 660000

In [54]:
y, sr = librosa.load('genres/classical/classical.00034.wav')
feature = np.hstack([librosa.feature.chroma_stft(y=y, sr=sr, hop_length=512).flatten(), librosa.feature.mfcc(y=y, sr=sr, hop_length=512, n_mfcc=13).flatten()])
feature.shape

(32325,)

In [104]:
# Loads n_songs songs from each genre in load_genres.  
def load_dataset(genres=all_genres, n_songs=num_songs):
    X, Y = None, None
    for g_idx, g in enumerate(genres):
        for s_idx in range(n_songs):
            y, sr = librosa.load(f'genres/{g}/{g}.000{s_idx:02d}.wav')
            # Limit length of sample so that all features have the same length.
            y = y[:Y_LIMIT]
            
            feature = np.hstack([
                #librosa.feature.chroma_stft(y=y, sr=sr, hop_length=512).flatten(),
                librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=512).flatten(),
                librosa.feature.mfcc(y=y, sr=sr, hop_length=512, n_mfcc=13).flatten(),
                #librosa.feature.spectral_flatness(y=y, hop_length=512).flatten(),
                librosa.feature.zero_crossing_rate(y=y, hop_length=512).flatten()
            ])
            
            if X is None:
                X = feature.reshape(1, feature.shape[0])
                Y = np.array([[g_idx]])
                
            else:
                X = np.vstack([X, feature])
                Y = np.vstack([Y, np.array([[g_idx]])])
    return X, Y

In [105]:
X, Y = load_dataset(all_genres, 50)
Y = Y.ravel()

MemoryError: Unable to allocate array with shape (387, 33540) and data type float64

In [96]:
print(X.shape, Y.shape)

(500, 32250) (500,)


# Models

In [12]:
from sklearn.metrics import confusion_matrix 
from sklearn.model_selection import train_test_split 
from sklearn.decomposition import PCA
from sklearn.cross_decomposition import CCA

In [97]:
X_train, X_test, y_train, y_test = train_test_split(X, Y) 

In [98]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(375, 32250) (125, 32250) (375,) (125,)


### SVM

In [15]:
from sklearn.svm import SVC

In [99]:
svm_model = SVC(kernel = 'linear', C = 1).fit(X_train, y_train) 

In [100]:
svm_accuracy = svm_model.score(X_test, y_test) 
svm_accuracy

0.568

#Just chroma: 0.38 accuracy
#Just chroma delta: 0.204 accuracy
#Just zero crossing rate: 0.252 accuracy
#MFCC and chroma: 0.592 accuracy (half of data)
#MFCC and chroma_cqt: 0.568 accuracy (half of data)
#Just spectral flatness: 0.224 accuracy
#MFCC+chroma+spectral flatness: 0.528 accuracy (half of data)

In [101]:
svm_predictions = svm_model.predict(X_test)
svm_cm = confusion_matrix(y_test, svm_predictions) 
svm_cm

array([[ 8,  0,  1,  0,  0,  2,  0,  0,  0,  1],
       [ 0,  9,  1,  0,  0,  1,  0,  0,  0,  2],
       [ 0,  0,  7,  1,  0,  0,  0,  0,  0,  1],
       [ 0,  1,  4,  7,  0,  0,  1,  1,  0,  3],
       [ 0,  0,  0,  1,  5,  0,  3,  1,  1,  1],
       [ 1,  1,  0,  0,  0,  7,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0, 10,  0,  0,  1],
       [ 0,  0,  5,  1,  0,  0,  0, 11,  0,  0],
       [ 7,  0,  0,  0,  1,  3,  0,  0,  2,  1],
       [ 1,  0,  1,  1,  0,  1,  0,  2,  0,  5]], dtype=int64)

### LR

In [68]:
from sklearn.linear_model import LogisticRegression

In [88]:
lr_model = LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=1000)

In [89]:
lr_model.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='multinomial', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [90]:
lr_accuracy = lr_model.score(X_test, y_test)
lr_accuracy

0.512

MFCC + Chroma: 0.536 accuracy (half of dataset)
MFCC + Chroma + spectral flatness: 0.512 accuracy (half of dataset)

In [72]:
lr_predictions = lr_model.predict(X_test)
lr_cm = confusion_matrix(y_test, lr_predictions) 
lr_cm

array([[ 7,  0,  1,  0,  0,  4,  0,  0,  1,  0],
       [ 0,  9,  0,  0,  0,  1,  0,  0,  0,  0],
       [ 0,  1,  6,  0,  1,  1,  1,  1,  0,  1],
       [ 0,  1,  4,  5,  1,  0,  0,  2,  0,  3],
       [ 0,  0,  1,  1,  5,  1,  2,  2,  0,  1],
       [ 0,  2,  1,  1,  1,  4,  0,  0,  1,  1],
       [ 0,  0,  0,  0,  1,  0, 13,  0,  0,  1],
       [ 0,  0,  1,  0,  0,  0,  0, 10,  0,  0],
       [ 1,  1,  1,  0,  1,  4,  0,  0,  4,  1],
       [ 1,  0,  1,  2,  1,  0,  0,  2,  0,  4]], dtype=int64)

### Random Forest