## This notebook generates the PNG image files that will be used by a CNN.



Source: fma_small mp3 file.

Target: smMELsg_3sec_Pop_Roc_Elect_5fold.zip

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import utils

content_dir = '/content/sg3s'
AUDIO_DIR = './fma_small'
content_mel_dir = '/content/smMEL3s'

In [None]:
import multiprocessing
import os

cores = multiprocessing.cpu_count()
print(f'Cores: {cores}')
print(f'OS CPU count: {os.cpu_count()}')
print(f'Affinity: {os.sched_getaffinity(0)}')

Cores: 8
OS CPU count: 8
Affinity: {0, 1, 2, 3, 4, 5, 6, 7}


In [None]:
tracks = utils.load('data/fma_metadata/tracks.csv')
subset = tracks.index[tracks['set', 'subset'] <= 'small']
assert subset.isin(tracks.index).all()
tracks = tracks.loc[subset]
labels = tracks['track', 'genre_top']
tracks.shape

(8000, 52)

In [None]:
train = tracks.index[tracks['set', 'split'] == 'training']
train_lables = labels[train]
val = tracks.index[tracks['set', 'split'] == 'validation']
val_lables = labels[val]
test = tracks.index[tracks['set', 'split'] == 'test']
test_lables = labels[test]
print('{} training examples, {} validation examples, {} testing examples'.format(*map(len, [train, val, test])))

6400 training examples, 800 validation examples, 800 testing examples


In [None]:
idx = 124752
tracks[tracks['set', 'split'] == 'training'].loc[idx]['track', 'genre_top']

'Electronic'

In [None]:
genres = ['Hip-Hop', 'Pop', 'Folk', 'Experimental', 'Rock',
          'International', 'Electronic', 'Instrumental']

In [None]:
import os

genre = 'Rock'
track_clip_lst = []
for filename in os.listdir(content_dir + '/train/' + f"{genre}"):
  track_path = f'{content_dir}/train/{genre}/{filename}'
  track_clip_lst.append((track_path,filename))

In [None]:
train_genreMap = {}
for g in genres:
  indxs = tracks.loc[train].index[ tracks.loc[train]['track', 'genre_top'] == g]
  train_genreMap[g] = indxs

In [None]:
for key in train_genreMap.keys():
  print(len(train_genreMap[key]))

800
800
800
800
800
800
800
800


In [None]:
import os

os.makedirs(content_dir, exist_ok=True)
os.makedirs(content_dir + '/train', exist_ok=True)
os.makedirs(content_dir + '/test', exist_ok=True)
for g in genres:
  os.makedirs(content_dir + '/train/' + g, exist_ok=True)
  os.makedirs(content_dir + '/test/' + g, exist_ok=True)

In [None]:
def get_audio_path(audio_dir, track_id):
    tid_str = '{:06d}'.format(track_id)
    return os.path.join(audio_dir, tid_str[:3], tid_str + '.mp3')

In [None]:
train_genreMap['Pop'][0]

10

# Create Training Images
## First, slice MP3 files into 3 second clips

In [None]:
%%time

from pydub import AudioSegment as AS
from pydub.utils import make_chunks
from multiprocessing import Pool
import functools
from functools import partial
from tqdm.notebook import tqdm

#genre = 'Pop'
#['Hip-Hop', 'Folk', 'Experimental', 'Rock', 'International', 'Electronic', 'Instrumental']
for genre in tqdm(['Pop']):

  genre_ids = train_genreMap[genre]

  for track_id in genre_ids:
    #track_id = train_genreMap['Pop'][1]
    track_path = get_audio_path(AUDIO_DIR, track_id)
    try:
      track = AS.from_mp3(track_path)
      chunk_length_ms = 3000 # pydub calculates in millisec
      chunks = make_chunks(track, chunk_length_ms)

      def export_audio(chunk):
        try:
          chunk[1].export(content_dir + '/train/' + genre + '/' +
                    genre + '_' + str(track_id) + '_' + str(chunk[0]) + '.mp3',
                    format='mp3')
        except Exception as e:
          print(chunk[1])
          print(e)

      chunk_list = []
      for idx, chunk in enumerate(chunks):
        chunk_list.append((idx, chunk))
      pool = Pool()
      pool.map(export_audio, chunk_list)
      pool.close()
      pool.join()
    except Exception as e:
      print(track_path)
      print(e)
      continue


  0%|          | 0/1 [00:00<?, ?it/s]

CPU times: user 1min 7s, sys: 2min 19s, total: 3min 27s
Wall time: 33min 20s


Check if files are still playable.

In [None]:
import librosa
import IPython.display as ipd
track_path
#x, sr = librosa.load('./sg3s/train/Pop/Pop_10_0.mp3', sr=None, mono=True)
x, sr = librosa.load(track_path, sr=None, mono=True)
#x = AS.from_mp3(track_path)
ipd.Audio(data=x, rate=sr)

In [None]:
import librosa
import IPython.display as ipd
track_path
x, sr = librosa.load('./sg3s/train/Pop/Pop_154414_9.mp3', sr=None, mono=True)
#x, sr = librosa.load(track_path, sr=None, mono=True)
#x = AS.from_mp3(track_path)
ipd.Audio(data=x, rate=sr)

## Now create images files.

In [None]:
import os

os.makedirs(content_mel_dir, exist_ok=True)
os.makedirs(content_mel_dir + '/train', exist_ok=True)
os.makedirs(content_mel_dir + '/test', exist_ok=True)
for g in genres:
  os.makedirs(content_mel_dir + '/train/' + g, exist_ok=True)
  os.makedirs(content_mel_dir + '/test/' + g, exist_ok=True)

In [None]:
len(track_clip_lst)

8319

In [None]:
%%time

import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
import librosa
import numpy as np
from multiprocessing import Pool
import functools
from functools import partial
from tqdm.notebook import tqdm
import os

genre = 'Rock'

def create_mel_sg(track_path):
  #track_path = track_clip_lst[1]
  try:
    target_img = f'{content_mel_dir}/train/{genre}/{track_path[1][:-3]}png'
    if os.path.isfile(target_img): return
    y,sr = librosa.load(track_path[0],duration=3)
    mels = librosa.feature.melspectrogram(y=y,sr=sr)
    fig = plt.Figure(frameon=False)
    canvas = FigureCanvas(fig)
    plt.imshow(librosa.power_to_db(mels,ref=np.max))
    plt.grid(False)
    plt.axis('off')
    plt.gca().get_xaxis().set_visible(False)
    plt.gca().get_yaxis().set_visible(False)
    plt.tight_layout()
    plt.savefig(target_img, bbox_inches='tight', pad_inches=0)
  except Exception as e:
    print(e)
    print(track_path[0])

plt.ioff()
#create_mel_sg(track_clip_lst[1])
pool = Pool(processes=3)
pool.map(create_mel_sg, track_clip_lst)
pool.close()
pool.join()
r = plt.ion()

CPU times: user 89.2 ms, sys: 87.4 ms, total: 177 ms
Wall time: 6.5 s


In [None]:
import cv2
content_mel_dir = '/content/smMEL3s'

img_shapes = {}
for filename in os.listdir(content_mel_dir + '/train/' + f"{genre}"):
  track_path = f'{content_mel_dir}/train/{genre}/{filename}'
  im = cv2.imread(track_path)
  if im.shape not in img_shapes:
    img_shapes[im.shape] = 1
  else:
    img_shapes[im.shape] += 1

img_shapes

# Create Test Dataset Image Files

In [None]:
test_genreMap = {}
for g in genres:
  indxs = tracks.loc[test].index[ tracks.loc[test]['track', 'genre_top'] == g]
  test_genreMap[g] = indxs

In [None]:
for key in test_genreMap.keys():
  print(key, len(test_genreMap[key]))

Hip-Hop 100
Pop 100
Folk 100
Experimental 100
Rock 100
International 100
Electronic 100
Instrumental 100


## Slice MP3 file int 10 parts.

In [None]:
%%time

from pydub import AudioSegment as AS
from pydub.utils import make_chunks
from multiprocessing import Pool
import functools
from functools import partial
from tqdm.notebook import tqdm

rest_g = ['Hip-Hop', 'Folk', 'Experimental', 'International', 'Instrumental']
#rest_g = ['Pop', 'Electronic', 'Rock']
for genre in tqdm(rest_g):

  genre_ids = test_genreMap[genre]

  for track_id in genre_ids:
    track_path = get_audio_path(AUDIO_DIR, track_id)
    try:
      track = AS.from_mp3(track_path)
      chunk_length_ms = 3000 # pydub calculates in millisec
      chunks = make_chunks(track, chunk_length_ms)

      def export_audio(chunk):
        try:
          chunk[1].export(content_dir + '/test/' + genre + '/' +
                    genre + '_' + str(track_id) + '_' + str(chunk[0]) + '.mp3',
                    format='mp3')
        except Exception as e:
          print(chunk[1])
          print(e)

      chunk_list = []
      for idx, chunk in enumerate(chunks):
        chunk_list.append((idx, chunk))
      pool = Pool(processes=6)
      pool.map(export_audio, chunk_list)
      pool.close()
      pool.join()
    except Exception as e:
      print(track_path)
      print(e)
      continue

  0%|          | 0/5 [00:00<?, ?it/s]

CPU times: user 29.3 s, sys: 38.4 s, total: 1min 7s
Wall time: 10min 50s


In [None]:
import os

genre = 'Electronic'
test_track_clip_lst = []
for filename in os.listdir(content_dir + '/test/' + f"{genre}"):
  track_path = f'{content_dir}/test/{genre}/{filename}'
  test_track_clip_lst.append((track_path,filename))

In [None]:
len(test_track_clip_lst)

1043

## Create Image files for Test dataset

In [None]:
%%time

import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
import librosa
import numpy as np
from multiprocessing import Pool
import functools
from functools import partial
from tqdm.notebook import tqdm
import os

print(f'Create image for {genre}')

def create_mel_sg(track_path):
  try:
    target_img = f'{content_mel_dir}/test/{genre}/{track_path[1][:-3]}png'
    if os.path.isfile(target_img): return
    y,sr = librosa.load(track_path[0],duration=3)
    mels = librosa.feature.melspectrogram(y=y,sr=sr)
    fig = plt.Figure(frameon=False)
    canvas = FigureCanvas(fig)
    plt.imshow(librosa.power_to_db(mels,ref=np.max))
    plt.grid(False)
    plt.axis('off')
    plt.gca().get_xaxis().set_visible(False)
    plt.gca().get_yaxis().set_visible(False)
    plt.tight_layout()
    plt.savefig(target_img, bbox_inches='tight', pad_inches=0)
  except Exception as e:
    print(e)
    print(track_path[0])

plt.ioff()
pool = Pool(processes=7)
pool.map(create_mel_sg, test_track_clip_lst)
pool.close()
pool.join()
r = plt.ion()

CPU times: user 3.78 s, sys: 498 ms, total: 4.28 s
Wall time: 12min 1s


Check if dimensions of image files created.

In [None]:
import cv2
import os

content_mel_dir = '/content/smMEL3s'

for genre in ['Pop', 'Rock', 'Electronic']:
  img_shapes = {}
  for filename in os.listdir(content_mel_dir + '/test/' + f"{genre}"):
    track_path = f'{content_mel_dir}/test/{genre}/{filename}'
    im = cv2.imread(track_path)
    if im.shape not in img_shapes:
      img_shapes[im.shape] = 1
    else:
      img_shapes[im.shape] += 1
  print(img_shapes)

{(450, 457, 3): 951, (450, 453, 3): 49}
{(450, 457, 3): 946, (450, 453, 3): 53}
{(450, 457, 3): 940, (450, 453, 3): 57}
