In [1]:
import tensorflow.keras
from tensorflow.keras.layers import Activation, Dense, Dropout, Conv2D, \
                         Flatten, MaxPooling2D
from tensorflow.keras.models import Sequential
import librosa
import librosa.display
import numpy as np
import pickle
import random
import glob
import re
from sklearn.preprocessing import LabelEncoder

# Data Preprocessing 
Extract song names and corresponding meters

Encode song meters to integers for classifier

**model and preprocessing code largely based on:**
*   Code Emporium on YouTube: *Sound Play with Convolution Neural Networks*
*   Code Emporium's (Ajay Halthor) code on GitHub: *audio-classifier-convNet*
*   arXiv paper for model specification: https://arxiv.org/pdf/1608.04363.pdf (by *Justin Salamon and Juan Pablo Bello at MARL/CUSP at NYU*)

In [2]:
%cd ../data/audio

# Extract song names from .wav files 
songs = [re.search('.+?(?=\.wav)', x).group(0)
              for x in glob.iglob('*.wav')]

# Extract song meter from the annotations directory 
meter = []
for song in songs:
    with open('../annotations/meter/' + song + '.meter', 'r') as file:
        meter.append(file.readline())

# Encode song meter (taala) as an integer 
le = LabelEncoder()
meter_enc = le.fit_transform(meter)

print(meter[:5], meter_enc[:5])
print("\n", songs[:5])

C:\Users\lexokan\Dropbox\R and Python\generativeMusic\data\audio
['8/4', '3/4', '7/8', '5/8', '8/4'] [3 0 2 1 3]

 ['01_10003_1-04_Shri_Visvanatham', '01_11001_3_Sankari_Neeve', '01_12001_8_Krishna_Nee_Begane', '01_13001_2-02_Hecharikaga', '02_10004_1_Vathapi']


Create dataset as list of tuples -- (song name, melspectrogram array, meter encoding)

Created random melspec snapshots of ~3s b/w 10s and 80s of each song

\# of random melspec snapshots (30) affects total sample size = # snapshots * # songs

Takes long time (and memory!) to build dataset --> bottle neck of script

Improvements:

*   Create generator to feed in batches of training & validation data in real-tim
*   Preprocess data in a multicore manner

In [11]:
D = [] # Dataset

for i, song in enumerate(songs):
    for j in range(10):
        y, sr = librosa.load(song + '.wav', 
                             offset = np.random.randint(10, 80),
                             duration=2.97)  
        ps = librosa.feature.melspectrogram(y=y, sr=sr)
        if ps.shape != (128, 128): continue
        D.append( (song, ps, meter_enc[i]) )
        


In [12]:
with open('../temp/dataset1.pkl', 'wb') as f:
    pickle.dump(D[:600], f)
    
with open('../temp/dataset2.pkl', 'wb') as f:
    pickle.dump(D[600:], f)