<a href="https://colab.research.google.com/github/atjessehill/Thesis-Notebooks/blob/main/Process_Audio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
from os import path
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from google.colab import drive
import json
from sklearn import preprocessing
import glob
drive.mount('/content/drive')
from skimage.io import imsave, imread


Mounted at /content/drive


In [None]:
BASE = 'drive/My Drive'
SONG_SAMPLE_PATH = 'Thesis/Samples'
DATA_SAVE_PATH = 'Thesis/InputData'
SONG_FULL_PATH = 'Thesis/Samples/FULL_TRACKS'

In [None]:
import essentia
import essentia.standard as es

In [None]:
sample_song = '4dMnqJWeRMM.mp3'

In [None]:
def rescale_feature(x):
  scaler = preprocessing.MinMaxScaler()
  scaler.fit(x)

  return scaler.transform(x)

def load_splits():
  with open(os.path.join(BASE, DATA_SAVE_PATH, '10-fold-splits.json'), 'r') as fp:
    data = json.load(fp)

  return data


def save_mfcc(json_file, n_mfcc=20, n_fft=2048, hop_length=512, rescale=False, debug=False):

  json_save_file = os.path.join(BASE, DATA_SAVE_PATH, json_file)

  data = {
      'data': 13,

            'mapping': {
          'noDJ': 0,
          'yesDJ': 1
      },
      'mfcc': [],
      'labels': []
  }

  paths = ['noDJ', 'yesDJ']
  shape = None
  for i, (p) in enumerate(paths):
    for j, (file) in enumerate(glob.glob(os.path.join(BASE, SONG_SAMPLE_PATH, p, "*.mp3"))):
      if j % 25 == 0:
        print("j{} i{} Loading{}".format(j, i, file))
      #print("j{} i{} Loading{}".format(j, i, file))
      signal, sr = librosa.load(file, sr=22050)
      mfcc = librosa.feature.mfcc(signal, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
      mfcc = mfcc.T
      if rescale:
        mfcc = rescale_feature(mfcc)
      if j == 0:
        print("Shape is {}".format(mfcc.shape))
        shape = mfcc.shape
      if mfcc.shape == shape:

        data['mfcc'].append(mfcc.tolist())
        data['labels'].append(i)

      else:
        print("Coudln't save {} with shape {} on file {}".format(file, mfcc.shape, file.split('//')[-1]))

      if debug and j==2:
        break
    if debug:
      return data

    with open(json_save_file, 'w') as fp:
      json.dump(data, fp, indent=4)

In [None]:
def load_data(path):
  with open(os.path.join(BASE, DATA_SAVE_PATH, 'All', path), 'r') as fp:
    data = json.load(fp)

  x = np.array(data['mfcc'])
  y = np.array(data['labels'])
  return x, y

def load_data_list(path):
  with open(os.path.join(BASE, DATA_SAVE_PATH, path), 'r') as fp:
    data = json.load(fp)

  x = data['mfcc']
  y = data['labels']
  return x, y

def setup_mfcc():

  n_mfcc = [20]
  # #n_mfcc = [13, 14, 15, 16]
  n_fft = [1024]
  hop_length = [1024]

  # n_mfcc = [20, 17]
  # n_fft = [2048]
  # hop_length = [512]


  for nmfcc in n_mfcc:
    for nfft in n_fft:
      for hoplength in hop_length:
        json_file = "MFCC_{}_nfft_{}_hl_{}.json".format(nmfcc, nfft, hoplength)
        if nfft != hoplength:
          print(f"Skipping {json_file}")
          # continue
        else:
          print("Calling {}".format(json_file))
          save_mfcc(json_file, n_mfcc=nmfcc, n_fft=nfft, hop_length=hoplength, rescale=True)

In [None]:
def twenty_to_fewer_mfcc():

  n_mfcc = [13, 14, 15, 16, 17, 18, 19]
  n_fft = [1024]
  hop_length = [1024]
  for nfft in n_fft:
    for hoplength in hop_length:
      data = {
          'mapping': {
              'noDJ': 0,
              'yesDJ': 1
          },
          'mfcc': [],
          'labels': []
      }

      if nfft == 512 and hoplength == 512:
        continue
      
      twenty_ref = f'MFCC_20_nfft_{nfft}_hl_{hoplength}.json'
      if path.exists(os.path.join(BASE, DATA_SAVE_PATH, twenty_ref)):
        print(f"Loading {twenty_ref}")
        x, y = load_data_list(twenty_ref)      
      
      for nmfcc in n_mfcc:
        json_save_file = os.path.join(BASE, DATA_SAVE_PATH, f'MFCC_{nmfcc}_nfft_{nfft}_hl_{hoplength}.json')
        outer = []
        for i in x:
          inner = []
          for j in i:
            inner.append(j[:nmfcc])
          outer.append(inner)

        data['mfcc'] = outer
        data['labels'] = y

        with open(json_save_file, 'w') as fp:
          json.dump(data, fp, indent=4)
          print("Created "+json_save_file)


In [None]:
setup_mfcc()

Calling MFCC_20_nfft_1024_hl_1024.json
j0 i0 Loadingdrive/My Drive/Thesis/Samples/noDJ/0TLkXhem5Yk.mp3
Shape is (3876, 20)
j25 i0 Loadingdrive/My Drive/Thesis/Samples/noDJ/DREDfpV4k3w.mp3
j50 i0 Loadingdrive/My Drive/Thesis/Samples/noDJ/PE5luMsuiv8.mp3
Coudln't save drive/My Drive/Thesis/Samples/noDJ/s3texqVsAC8.mp3 with shape (173, 20) on file drive/My Drive/Thesis/Samples/noDJ/s3texqVsAC8.mp3
j75 i0 Loadingdrive/My Drive/Thesis/Samples/noDJ/vBAhXU9dxro.mp3
Coudln't save drive/My Drive/Thesis/Samples/noDJ/yUJ0QjrNFFw.mp3 with shape (326, 20) on file drive/My Drive/Thesis/Samples/noDJ/yUJ0QjrNFFw.mp3
j0 i1 Loadingdrive/My Drive/Thesis/Samples/yesDJ/0-MxidypKwI.mp3
Shape is (3876, 20)
j25 i1 Loadingdrive/My Drive/Thesis/Samples/yesDJ/btz2Cf96Jic.mp3
j50 i1 Loadingdrive/My Drive/Thesis/Samples/yesDJ/lOOtaHiTsus.mp3
j75 i1 Loadingdrive/My Drive/Thesis/Samples/yesDJ/slnsCDoMInk.mp3


In [None]:
y, sr = librosa.load('drive/My Drive/Thesis/Samples/noDJ/DREDfpV4k3w.mp3', sr=22050)

In [None]:
s = librosa.feature.melspectrogram(y=y, sr=sr)
mels = np.log(s + 1e-9)
img = scale_minmax(mels, 0.0, 255.0).astype(np.uint8)
img = np.flip(img, axis=0)
img = 255-img
# img = (255*(mels - np.min(mels)/np.ptp(mels))).astype(int)
# img = np.flip(img, axis=0)
# s /= np.max(np.abs(s), axis=0).astype(np.uint8)
# s = rescale_feature(s).astype(np.uint8)
# s = np.flip(s, axis=0)
# # s = 1.0-s

In [None]:
def scale_minmax(X, min=0.0, max=1.0):
  X_std = (X - X.min()) / (X.max() - X.min())
  X_scaled = X_std * (max - min) + min
  return X_scaled

In [None]:
n_fft = [1024]
hop_length = [512]
paths = ['noDJ', 'yesDJ']
for nfft in n_fft:
  for hl in hop_length:
    print(f"Starting {nfft} and {hl}")
    # for i, (p) in enumerate(paths):
    for j, (file) in enumerate(glob.glob(os.path.join(BASE, SONG_SAMPLE_PATH, "30_sec", "*.mp3"))):
      url = file.split('/')[-1]
      url, clip, _ = url.split('.mp3')
      out = url+'_spec'+clip+'.npy'
      if j % 25 == 0:
        print("j{} i{} Loading{}".format(j, i, file))
      file_output = os.path.join(BASE, DATA_SAVE_PATH, "Spectrograms_30sec", out)
      if os.path.exists(file_output):
        continue
      y, sr = librosa.load(file, sr=22050)
      s = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=nfft, hop_length=hl)
      mels = np.log(s + 1e-9)
      img = scale_minmax(mels, 0.0, 255.0).astype(np.uint8)
      img = np.flip(img, axis=0)
      img = 255-img
      np.save(file_output, img)
      if j % 25 == 0:
        print(f"Finished {j}")

Starting 1024 and 512
j0 i0 Loadingdrive/My Drive/Thesis/Samples/30_sec/C4jEVe5McRE.mp3_clip_0.mp3
Finished 0
j25 i0 Loadingdrive/My Drive/Thesis/Samples/30_sec/khl-Ky_TnhA.mp3_clip_8.mp3
Finished 25
j50 i0 Loadingdrive/My Drive/Thesis/Samples/30_sec/IRnl746GIuo.mp3_clip_3.mp3
Finished 50
j75 i0 Loadingdrive/My Drive/Thesis/Samples/30_sec/AwHBTnb-WQQ.mp3_clip_14.mp3
Finished 75
j100 i0 Loadingdrive/My Drive/Thesis/Samples/30_sec/DO21jwbhXaQ.mp3_clip_10.mp3
Finished 100
j125 i0 Loadingdrive/My Drive/Thesis/Samples/30_sec/R13pIiQ7PLc.mp3_clip_8.mp3
Finished 125
j150 i0 Loadingdrive/My Drive/Thesis/Samples/30_sec/3awn-74uJzw.mp3_clip_13.mp3
Finished 150
j175 i0 Loadingdrive/My Drive/Thesis/Samples/30_sec/0ABA9tIqCCo.mp3_clip_7.mp3
Finished 175
j200 i0 Loadingdrive/My Drive/Thesis/Samples/30_sec/aFGNk2ilFfI.mp3_clip_9.mp3
Finished 200
j225 i0 Loadingdrive/My Drive/Thesis/Samples/30_sec/zUp4y5aScM0.mp3_clip_3.mp3
Finished 225
j250 i0 Loadingdrive/My Drive/Thesis/Samples/30_sec/scDul_mI4uc.m

In [None]:
n_mfcc = [20]
n_fft = [1024]
hop_length = [128, 256, 512, 1024]
paths = ['noDJ', 'yesDJ']
rescale = True
for nmfcc in n_mfcc:
  for nfft in n_fft:
    for hl in hop_length:
      print(f"Starting {nfft} and {hl}")
      for i, (p) in enumerate(paths):
          for j, (file) in enumerate(glob.glob(os.path.join(BASE, SONG_SAMPLE_PATH, p, "*.mp3"))):
            url = file.split('/')[-1]
            out = url+'_MFCC_'+str(nmfcc)+'_'+str(nfft)+'_'+str(hl)+'.npy'
            file_output = os.path.join(BASE, DATA_SAVE_PATH, "MFCC", out)
            if j % 25 == 0:
              print("j{} i{} Loading{}".format(j, i, file))
            signal, sr = librosa.load(file, sr=22050)

            mfcc = librosa.feature.mfcc(signal, sr=sr, n_mfcc=nmfcc, n_fft=nfft, hop_length=hl)
            mfcc = mfcc.T
            if rescale:
              mfcc = rescale_feature(mfcc)
            if j == 0:
              print("Shape is {}".format(mfcc.shape))
              shape = mfcc.shape
            if mfcc.shape == shape:
              np.save(file_output, mfcc)
            else:
              print("Could not save", file_output)
            print(j)

In [None]:
!pip install pydub



In [None]:
def slice_audio():

  paths = ['noDJ', 'yesDJ']

  for i, (p) in enumerate(paths):
    for j, (file) in enumerate(glob.glob(os.path.join(BASE, 'Thesis', 'Samples', 'FULL_TRACKS', p, "*.mp3"))):
      if j % 25 == 0:
        print("Starting ", i, j, file)

      mp3_name = file.split('/')[-1]
      song = AudioSegment.from_mp3(file)
      offset = 0
      remove = len(song) % (30*1000)
      song = song[:len(song)-remove]
      slices = int(len(song)/(30*1000))
      for s in range(0, slices):
        start = s*30*1000
        end = (s+1)*30*1000
        extract = song[start:end]
        if len(extract) != 30000:
          print(f"Errored at {file} clip {s} --> {len(extract)}")
        else:
          file_name = mp3_name+f'_clip_{s}.mp3'
          file_name = os.path.join(BASE, 'Thesis', 'Samples', '30_sec', file_name)
          print(file_name)
          extract.export(file_name, format='mp3')

def 



slice_audio()

Starting  0 0 drive/My Drive/Thesis/Samples/FULL_TRACKS/noDJ/25NYAVvPvl4.mp3
drive/My Drive/Thesis/Samples/30_sec/25NYAVvPvl4.mp3_clip_0.mp3
drive/My Drive/Thesis/Samples/30_sec/25NYAVvPvl4.mp3_clip_1.mp3
drive/My Drive/Thesis/Samples/30_sec/25NYAVvPvl4.mp3_clip_2.mp3
drive/My Drive/Thesis/Samples/30_sec/25NYAVvPvl4.mp3_clip_3.mp3
drive/My Drive/Thesis/Samples/30_sec/25NYAVvPvl4.mp3_clip_4.mp3
drive/My Drive/Thesis/Samples/30_sec/25NYAVvPvl4.mp3_clip_5.mp3
drive/My Drive/Thesis/Samples/30_sec/25NYAVvPvl4.mp3_clip_6.mp3
drive/My Drive/Thesis/Samples/30_sec/25NYAVvPvl4.mp3_clip_7.mp3
drive/My Drive/Thesis/Samples/30_sec/25NYAVvPvl4.mp3_clip_8.mp3
drive/My Drive/Thesis/Samples/30_sec/25NYAVvPvl4.mp3_clip_9.mp3
drive/My Drive/Thesis/Samples/30_sec/25NYAVvPvl4.mp3_clip_10.mp3
drive/My Drive/Thesis/Samples/30_sec/25NYAVvPvl4.mp3_clip_11.mp3
drive/My Drive/Thesis/Samples/30_sec/25NYAVvPvl4.mp3_clip_12.mp3
drive/My Drive/Thesis/Samples/30_sec/vBAhXU9dxro.mp3_clip_0.mp3
drive/My Drive/Thesis/Sa

In [None]:
url_splits = []
for i, shuffle_index in enumerate(splits):
  fold_data = {}
  x_train = np.delete(urls_np, shuffle_index)
  x_test = np.take(urls_np, shuffle_index)
  y_train = np.delete(result_np, shuffle_index)
  y_test = np.take(result_np, shuffle_index)

  fold_data['train'] = x_train.tolist()
  fold_data['test'] = x_test.tolist()
  fold_data['ytrain'] = y_train.tolist()
  fold_data['ytest'] = y_test.tolist()
  url_splits.append(fold_data)


  # for config in [f1, f2, f3, f4, f5, f6, f7, f8, f9]:
  # print(config)
  # x, y = load_data(config+'.json')
  # for i, shuffle_index in enumerate(splits):
  #     save_x_train = os.path.join(BASE, DATA_SAVE_PATH, '10FoldCrosstab', config, f'x_train_{i}.npy')
  #     save_x_test = os.path.join(BASE, DATA_SAVE_PATH, '10FoldCrosstab', config, f'x_test_{i}.npy')
  #     save_y_train = os.path.join(BASE, DATA_SAVE_PATH, '10FoldCrosstab', config, f'y_train_{i}.npy')
  #     save_y_test = os.path.join(BASE, DATA_SAVE_PATH, '10FoldCrosstab', config, f'y_test_{i}.npy')

  #     x_train = np.delete(x, shuffle_index, axis=0)
  #     x_test = np.take(x, shuffle_index, axis=0)
  #     y_train = np.delete(y, shuffle_index, axis=0)
  #     y_test = np.take(y, shuffle_index, axis=0)
  #     print(x_train.shape)
  #     if len(x_train) != len(y_train):
  #       print(f"Error: {f1} {i} {x_train.shape} {y_train.shape}")
  #     if len(x_test) != len(y_test):
  #       print(f"Error: {f1} {i} {x_test.shape} {y_test.shape}")
  #     np.save(save_x_train, x_train)
  #     np.save(save_x_test, x_test)
  #     np.save(save_y_train, y_train)
  #     np.save(save_y_test, y_test)

In [None]:
json_save_file = os.path.join(BASE, DATA_SAVE_PATH, '10-fold-splits.json')

with open(json_save_file, 'w') as fp:
  json.dump(url_splits, fp, indent=4)

In [None]:
# for i in range(0, 10):
#   for j in range(0, 10):
#     if i==j:
#       continue
#     as_set = set(url_splits[i]['test'])
#     intersection = as_set.intersection(url_splits[j]['test'])
#     print(intersection)
for i in range(0, 10):
  train = set(url_splits[i]['train'])
  intersection = train.intersection(url_splits[i]['test'])
  print(intersection)

set()
set()
set()
set()
set()
set()
set()
set()
set()
set()


In [None]:
twenty_to_fewer_mfcc()

Loading MFCC_20_nfft_1024_hl_1024.json
Created drive/My Drive/Thesis/InputData/MFCC_13_nfft_1024_hl_1024.json
Created drive/My Drive/Thesis/InputData/MFCC_14_nfft_1024_hl_1024.json
Created drive/My Drive/Thesis/InputData/MFCC_15_nfft_1024_hl_1024.json
Created drive/My Drive/Thesis/InputData/MFCC_16_nfft_1024_hl_1024.json
Created drive/My Drive/Thesis/InputData/MFCC_17_nfft_1024_hl_1024.json
Created drive/My Drive/Thesis/InputData/MFCC_18_nfft_1024_hl_1024.json
Created drive/My Drive/Thesis/InputData/MFCC_19_nfft_1024_hl_1024.json


In [None]:
features, feature_frames = es.MusicExtractor(lowlevelStats=['mean', 'stdev'],
                                             rhythmStats=['mean', 'stdev'],
                                             tonalStats=['mean', 'stdev'])(song)

In [None]:
def get_low_level_features(json_file):
  json_save_file = os.path.join(BASE, DATA_SAVE_PATH, json_file)

  data = {

      'mapping': {
        'noDJ': 0,
        'yesDJ': 1
      },
      'name': [],
      'labels': []
  }

  paths = ['noDJ', 'yesDJ']
  for i, (p) in enumerate(paths):
    for j, (file) in enumerate(glob.glob(os.path.join(BASE, SONG_FULL_PATH, p, "*.mp3"))):
      features, feature_frames = es.MusicExtractor(lowlevelStats=['mean', 'stdev'],
                                             rhythmStats=['mean', 'stdev'],
                                             tonalStats=['mean', 'stdev'])(file)

      for feature_name in features.descriptorNames():
        if feature_name not in data.keys():
          data[feature_name] = []
        data[feature_name].append(features[feature_name])
      data['name'].append(file.split('/')[-1])
      data['labels'].append(i)
      print("Extracted features for ", file)
      if j == 2:
        break
  with open(json_save_file, 'w') as f:
    json.dumps(data, f, indent=4)

get_low_level_features('outfile')

Extracted features for  drive/My Drive/Thesis/Samples/FULL_TRACKS/noDJ/CYvF2VIfKck.mp3
Extracted features for  drive/My Drive/Thesis/Samples/FULL_TRACKS/noDJ/_0bgwAkpp0Y.mp3
Extracted features for  drive/My Drive/Thesis/Samples/FULL_TRACKS/noDJ/C5VCCwvxOh0.mp3
Extracted features for  drive/My Drive/Thesis/Samples/FULL_TRACKS/yesDJ/w0dQK4_llDI.mp3
Extracted features for  drive/My Drive/Thesis/Samples/FULL_TRACKS/yesDJ/WbAR--Hz094.mp3
Extracted features for  drive/My Drive/Thesis/Samples/FULL_TRACKS/yesDJ/uyLGCNkSgmk.mp3


TypeError: ignored

In [None]:
print(sorted(features.descriptorNames()))

['lowlevel.average_loudness', 'lowlevel.barkbands.mean', 'lowlevel.barkbands.stdev', 'lowlevel.barkbands_crest.mean', 'lowlevel.barkbands_crest.stdev', 'lowlevel.barkbands_flatness_db.mean', 'lowlevel.barkbands_flatness_db.stdev', 'lowlevel.barkbands_kurtosis.mean', 'lowlevel.barkbands_kurtosis.stdev', 'lowlevel.barkbands_skewness.mean', 'lowlevel.barkbands_skewness.stdev', 'lowlevel.barkbands_spread.mean', 'lowlevel.barkbands_spread.stdev', 'lowlevel.dissonance.mean', 'lowlevel.dissonance.stdev', 'lowlevel.dynamic_complexity', 'lowlevel.erbbands.mean', 'lowlevel.erbbands.stdev', 'lowlevel.erbbands_crest.mean', 'lowlevel.erbbands_crest.stdev', 'lowlevel.erbbands_flatness_db.mean', 'lowlevel.erbbands_flatness_db.stdev', 'lowlevel.erbbands_kurtosis.mean', 'lowlevel.erbbands_kurtosis.stdev', 'lowlevel.erbbands_skewness.mean', 'lowlevel.erbbands_skewness.stdev', 'lowlevel.erbbands_spread.mean', 'lowlevel.erbbands_spread.stdev', 'lowlevel.gfcc.cov', 'lowlevel.gfcc.icov', 'lowlevel.gfcc.mean

In [None]:
f = []
ndarr = []
s = []
l = []
a = np.array
for i in features.descriptorNames():
  if isinstance(features[i], float):
    f.append(i)
  elif isinstance(features[i], str):
    s.append(i)
  elif isinstance(features[i], type(np.array([1, 2, 3]))):
    ndarr.append(i)
  elif isinstance(features[i], list):
    l.append(i)

In [None]:
feature_frames

<essentia.common.Pool at 0x7f43159a58d0>

In [None]:
pip install pyAudioAnalysis

Collecting pyAudioAnalysis
[?25l  Downloading https://files.pythonhosted.org/packages/5f/a7/21c523c77c90215137809d4c006553e02318cdd4a847f8d18b4ae0d01e1b/pyAudioAnalysis-0.3.5.tar.gz (41.2MB)
[K     |████████████████████████████████| 41.2MB 105kB/s 
[?25hBuilding wheels for collected packages: pyAudioAnalysis
  Building wheel for pyAudioAnalysis (setup.py) ... [?25l[?25hdone
  Created wheel for pyAudioAnalysis: filename=pyAudioAnalysis-0.3.5-cp36-none-any.whl size=41161777 sha256=95823f6a74ac8b5917708cb95fde0989fffbcfb0e97bf28690718b0c7c71558e
  Stored in directory: /root/.cache/pip/wheels/7d/6f/10/fa7739a0f0de564fdf368cff6102060f845f1e734bc670d35d
Successfully built pyAudioAnalysis
Installing collected packages: pyAudioAnalysis
Successfully installed pyAudioAnalysis-0.3.5


In [None]:
pip install pydub
pip install eyed3



In [None]:
from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import ShortTermFeatures
from pyAudioAnalysis import MidTermFeatures

In [None]:
sr, signal = audioBasicIO.read_audio_file(song)
signal = audioBasicIO.stereo_to_mono(signal)
F, f_names = ShortTermFeatures.feature_extraction(signal, sr, 0.050*sr, 0.025*sr)

In [None]:
F.shape

(68, 14701)

In [None]:
len(f_names)

68

In [None]:
F[0]

array([0.04251772, 0.06335973, 0.02917882, ..., 0.        , 0.        ,
       0.        ])

In [None]:
sr, signal = audioBasicIO.read_audio_file(song)
signal = audioBasicIO.stereo_to_mono(signal)

In [None]:
len(signal)

17642927

In [None]:
mid_window_seconds = int(1*sr)
mid_step_seconds = int(1*sr)

In [None]:
Features_midTerm, short_features_ignore, m_feature_name = MidTermFeatures.mid_feature_extraction(signal, sr, 17642927, 17642927, 0.050*sr, 0.025*sr)

In [None]:
len(m_feature_name)

136

In [None]:
Features_midTerm.shape

(136, 1)

In [None]:
m_feature_name[0]

'zcr_mean'

In [None]:
Features_midTerm[1]

array([0.06821387])