## Imports

In [31]:
import numpy as np
import librosa

## Load Data

In [30]:
all_genres = ['blues','classical','country','disco','hiphop','jazz','metal','pop','reggae','rock']
num_songs = 100
sr = 22050
Y_LIMIT = 660000

In [27]:
# TODO: Normalize X_mfcc and X_chroma before saving
# TODO: Implement train_test_split for all loaded sets
class DataLoader():
    def __init__(self):
        np.random.seed(1)
        self.X_mfcc = None
        self.X_mfcc_tensor = None
        self.X_mfcc_random_crop = None
        self.X_mfcc_fixed_crop = None
        
        self.X_chroma = None
        self.X_chroma_tensor = None
        self.X_chroma_random_crop = None
        self.X_chroma_fixed_crop = None
        
        self.Y = None
        self.Y_crop = None
        
        self.SEG = 10                       # Must evenly divide 30
        self.SEG_LENGTH = int(1290/self.SEG)
        self.RANDOM_SEG_LENGTH = 1200
        self.RANDOM_STARTS = np.random.randint(low=0, high=1290-self.RANDOM_SEG_LENGTH, size=(1000, self.SEG))
        
        self.train_idxs = np.sort(np.random.choice(np.array([i for i in range(1000)]), size=750, replace=False))
        self.test_idxs = np.array([i for i in range(1000) if i not in self.train_idxs])
        self.train_crop_idxs = np.sort(np.random.choice(np.array([i for i in range(1000 * self.SEG)]), size=750, replace=False))
        self.test_crop_idxs = np.array([i for i in range(1000 * self.SEG) if i not in self.train_idxs])
        
    def save_mfcc(self, genres=all_genres, songs=num_songs):
        '''
        Saves MFCC Coefficients.
        Produces a 1000 x 16770 array.
        '''
        assert(self.X_mfcc is None)
        X_mfcc = None
        for g_idx, g in enumerate(genres):
            for s_idx in range(songs):
                y, sr = librosa.load(f'genres/{g}/{g}.000{s_idx:02d}.wav')
                y = y[:Y_LIMIT]
                mfcc = librosa.feature.mfcc(y, sr=sr, hop_length=512, n_mfcc=13).flatten()
                if X_mfcc is None:
                    X_mfcc = mfcc.reshape(1, mfcc.shape[0])
                else:
                    X_mfcc = np.vstack([X_mfcc, mfcc])
        self.X_mfcc = X_mfcc
        np.savetxt('data/X_mfcc.csv', X_mfcc)
    
    def save_mfcc_random_crop(self):
        '''
        Saves 10 random crops of MFCC for every original training sample.
        Produces a 10000 x 13 x 1200 array, padded to 10000 x 13 x 1290.
        Reshapes into 10000*16770 for the CSV.
        '''
        assert(self.X_mfcc_random_crop is None)
        X_mfcc_crop = None
        for i, mfcc in enumerate(self.X_mfcc):
            crop = None
            for j in range(self.SEG):
                random_start = self.RANDOM_STARTS[i][j]
                random_seg = np.vstack([mfcc[1290*k+random_start : 1290*k+random_start+self.RANDOM_SEG_LENGTH] for k in range(13)])
                random_seg = random_seg.reshape(1, random_seg.shape[0], random_seg.shape[1])
                if crop is None:
                    crop = random_seg
                else:
                    crop = np.vstack([crop, random_seg])
            if X_mfcc_crop is None:
                X_mfcc_crop = crop
            else:
                X_mfcc_crop = np.vstack([X_mfcc_crop, crop])
        self.X_mfcc_random_crop = X_mfcc_crop
        np.savetxt('data/X_mfcc_random_crop.csv', X_mfcc_crop.reshape(1000*self.SEG, 13*self.RANDOM_SEG_LENGTH))
    
    def save_mfcc_fixed_crop(self):
        '''
        Saves 10 even segments of MFCC for every original training sample.
        Produces a 10000 x 13 x 129 array of MFCC coefficients for the segments.
        Reshapes into 10000*1677 for the CSV.
        '''
        assert(self.X_mfcc_fixed_crop is None)
        X_mfcc_crop = None
        for mfcc in self.X_mfcc:
            crop = np.stack([np.vstack([mfcc[1290*j+self.SEG_LENGTH*i : 1290*j+self.SEG_LENGTH*(i+1)] for j in range(13)]) for i in range(self.SEG)], axis=0)
            if X_mfcc_crop is None:
                X_mfcc_crop = crop
            else:
                X_mfcc_crop = np.vstack([X_mfcc_crop, crop])
        self.X_mfcc_fixed_crop = X_mfcc_crop
        np.savetxt('data/X_mfcc_fixed_crop.csv', X_mfcc_crop.reshape(1000*self.SEG, 13*self.SEG_LENGTH))
    
    def save_chroma(self, genres=all_genres, songs=num_songs):
        '''
        Saves Chromas.
        Produces a 1000 x 15480 array.
        '''
        assert(self.X_chroma is None)
        X_chroma = None
        for g_idx, g in enumerate(genres):
            for s_idx in range(songs):
                y, sr = librosa.load(f'genres/{g}/{g}.000{s_idx:02d}.wav')
                y = y[:Y_LIMIT]
                chroma = librosa.feature.chroma_cqt(y, sr=sr, hop_length=512).flatten()
                if X_chroma is None:
                    X_chroma = chroma.reshape(1, chroma.shape[0])
                else:
                    X_chroma = np.vstack([X_chroma, chroma])
        self.X_chroma = X_chroma
        np.savetxt('data/X_chroma.csv', X_chroma)

    def save_chroma_random_crop(self):
        '''
        Saves 10 random crops of Chromas for every original training sample.
        Produces a 10000 x 12 x 1200 array, padded to 10000 x 12 x 1290.
        Reshapes into 10000*15480 for the CSV.
        '''
        assert(self.X_chroma_random_crop is None)
        X_chroma_crop = None
        for i, chroma in enumerate(self.X_chroma):
            crop = None
            for j in range(self.SEG):
                random_start = self.RANDOM_STARTS[i][j]
                random_seg = np.vstack([chroma[1290*k+random_start : 1290*k+random_start+self.RANDOM_SEG_LENGTH] for k in range(12)])
                random_seg = random_seg.reshape(1, random_seg.shape[0], random_seg.shape[1])
                if crop is None:
                    crop = random_seg
                else:
                    crop = np.vstack([crop, random_seg])
            if X_chroma_crop is None:
                X_chroma_crop = crop
            else:
                X_chroma_crop = np.vstack([X_chroma_crop, crop])
        self.X_chroma_random_crop = X_chroma_crop
        np.savetxt('data/X_chroma_random_crop.csv', X_chroma_crop.reshape(1000*self.SEG, 12*self.RANDOM_SEG_LENGTH))
        
    def save_chroma_fixed_crop(self):
        '''
        Saves 10 even segments of Chromas for every original training sample.
        Produces a 10000 x 12 x 129 array of MFCC coefficients for the segments.
        Reshapes into 10000*1548 for the CSV.
        '''
        assert(self.X_chroma_fixed_crop is None)
        X_chroma_crop = None
        for chroma in self.X_chroma:
            crop = np.stack([np.vstack([mfcc[1290*j+self.SEG_LENGTH*i : 1290*j+self.SEG_LENGTH*(i+1)] for j in range(12)]) for i in range(self.SEG)], axis=0)
            if X_chroma_crop is None:
                X_chroma_crop = crop
            else:
                X_chroma_crop = np.vstack([X_chroma_crop, crop])
        self.X_chroma_fixed_crop = X_chroma_crop
        np.savetxt('data/X_chroma_fixed_crop.csv', X_chroma_crop.reshape(1000*self.SEG, 12*self.SEG_LENGTH))
        
    def load_mfcc(self, tensor=False):
        self.X_mfcc = np.loadtxt('data/X_mfcc.csv')
        if tensor:
            self.X_mfcc_tensor = self.X_mfcc.reshape(1000, 13, 1290)
    
    def load_mfcc_random_crop(self):
        self.X_mfcc_random_crop = np.loadtxt('data/X_mfcc_random_crop.csv').reshape(1000*self.SEG, 13, self.RANDOM_SEG_LENGTH)
        
    def load_mfcc_fixed_crop(self):
        self.X_mfcc_fixed_crop = np.loadtxt('data/X_mfcc_fixed_crop.csv').reshape(1000*self.SEG, 13, self.SEG_LENGTH)
        
    def load_chroma(self, tensor=False):
        self.X_chroma = np.loadtxt('data/X_chroma.csv')
        if tensor:
            self.X_mfcc_tensor = self.X_mfcc.reshape(1000, 12, 1290)
    
    def load_chroma_random_crop(self):
        self.X_chroma_random_crop = np.loadtxt('data/X_chroma_random_crop.csv').reshape(1000*self.SEG, 12, self.RANDOM_SEG_LENGTH)
        
    def load_chroma_fixed_crop(self):
        self.X_chroma_fixed_crop = np.loadtxt('data/X_chroma_fixed_crop.csv').reshape(1000*self.SEG, 12, self.SEG_LENGTH)
    
    def load_Y(self):
        self.Y = np.array([int(i/100) for i in range(1000)]).ravel()
        
    def load_Y_crop(self):
        self.Y_corp = np.array([int(i/(100 * self.SEG)) for i in range(1000 * self.SEG)]).ravel()

In [32]:
# # Only need to run this ONCE!
# # Saves features to data/...
# dl = DataLoader()
# dl.save_mfcc()
# dl.save_mfcc_fixed_crop()
# dl.save_mfcc_random_crop()
# dl.save_chroma()
# dl.save_chroma_fixed_crop()
# dl.save_chroma_random_crop()

In [12]:
# # Load from CSVs in data/...
# dl = DataLoader()
# dl.load_mfcc()
# dl.load_mfcc_fixed_crop()
# dl.load_mfcc_random_crop()
# dl.load_chroma()
# dl.load_chroma_fixed_crop()
# dl.load_chroma_random_crop()
# dl.load_Y()
# dl.load_Y_crop()

(1000, 15480)


## Preprocessing

In [None]:
# # Only need to run this ONCE!
# Generates train and test indexes among the 1000 datapoints
# x = np.array([i for i in range(1000)])
# np.random.seed(1)
# train_idxs = np.random.choice(x, size=750, replace=False)
# train_idxs.sort()
# test_idxs = []
# for i in range(1000):
#     if i not in train_idxs:
#         test_idxs.append(i)
# test_idxs = np.array(test_idxs)
# np.savetxt('data/train_idxs.csv', train_idxs)
# np.savetxt('data/test_idxs.csv', test_idxs)

In [10]:
# Load the train/test indexes instead of shuffling
train_idxs = np.loadtxt('data/train_idxs.csv').astype(int)
test_idxs = np.loadtxt('data/test_idxs.csv').astype(int)
print(train_idxs[-1], test_idxs[-1])

999 993


In [11]:
scaler = StandardScaler()
MX = scaler.fit_transform(dl.X_mfcc)
MX_train, MX_test = np.take(MX, train_idxs, 0), np.take(MX, test_idxs, 0)
print(MX_train.shape, MX_test.shape)

(750, 16770) (250, 16770)


In [89]:
scaler = StandardScaler()
CX = scaler.fit_transform(dl.X_chroma)
CX_train, CX_test = np.take(CX, train_idxs, 0), np.take(CX, test_idxs, 0)
print(CX_train.shape, CX_test.shape)

(750, 15480) (250, 15480)


In [12]:
scaler = StandardScaler()
X = np.hstack([dl.X_mfcc, dl.X_chroma])
X = scaler.fit_transform(X)
X_train, X_test = np.take(X, train_idxs, 0), np.take(X, test_idxs, 0)
print(X_train.shape, X_test.shape)

(750, 32250) (250, 32250)


In [13]:
scaler = StandardScaler()
PX = np.hstack([dl.X_mfcc_pca, dl.X_chroma_pca])
PX = scaler.fit_transform(PX)
PX_train, PX_test = np.take(PX, train_idxs, 0), np.take(PX, test_idxs, 0)
print(PX_train.shape, PX_test.shape)

(750, 200) (250, 200)


In [14]:
Y = dl.Y
Y_train, Y_test = np.take(Y, train_idxs, 0), np.take(Y, test_idxs, 0)
print(Y_train.shape, Y_test.shape)

(750,) (250,)
