<a href="https://colab.research.google.com/github/jasperSha/cloud_music/blob/main/feature_collection/frequency_collection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import auth
auth.authenticate_user()

In [2]:
import os
# create virtual directory for frequency data
os.makedirs('song_freqs', exist_ok=True)

# create virtual directory for the flac files
os.makedirs('song_flacs', exist_ok=True)

In [None]:
# load audio every N batches
# have to manually enter (only have to change tail input to starting index each time, head just adds to it)
start = 15501
end = start + 249

# load frequency data
!gsutil -m cp gs://deepclustermusic/song_freqs/*.csv .

!gsutil ls gs://deepclustermusic/flac_files/*.flac | tail -n +15501 | head -n 250 | gsutil -m cp -n -I './song_flacs' &> /dev/null


In [4]:
!ls song_flacs/*.flac | wc -l

ls: cannot access 'song_flacs/*.flac': No such file or directory
0


In [5]:
!pip install fastaudio &> /dev/null

In [6]:
import glob

import numpy as np
import pandas as pd

from scipy.stats import kurtosis, skew

import librosa
import librosa.feature

import torchaudio
import torch

In [7]:
flacs = list(glob.glob('/content/song_flacs/*.flac'))

# read finished frequency features in here
finished = list(glob.glob('*.csv'))
finished_df = pd.concat((pd.read_csv(f) for f in finished))
del finished

In [None]:
songs = []
for fname in flacs:
  song_id = fname.split('/')[-1].split('.')[0]
  print('attempting: ', song_id)
  if song_id in finished_df['id'].values:
    print('already finished this one')
    continue

  signal, sr = torchaudio.load(fname)
  channel = signal.ndim

  # convert any stereo to mono
  if channel == 2:
    signal = torch.mean(signal, dim=0, keepdim=True)

  # get duration in seconds
  num_elements = torch.numel(signal)
  duration = int(num_elements/sr)

  if duration < 120 or duration > 480:
    continue

  # transpose and squash dim, librosa expects dim=(n,) whereas torchaudio returns (1, n)
  signal = signal.T
  signal = signal[:,0]
  signal = signal.numpy()

  # check for nan or zero signals
  if np.isnan(signal).any():
    continue
  if not np.any(signal):
    continue

  s = {
      'id': song_id,
      'signal': signal,
      'sr': sr,
      'channel': channel,
      'duration': duration
  }
  songs.append(s)

signals_df = pd.DataFrame(songs)
del songs

features = [
            'zero_crossing_rate', # percussive sounds
            'spectral_centroid', # brightness
            'spectral_rolloff', # majority of frequency within which song presides
            'mfcc', # the perceptual shape(envelope of time power spectrum) of the sound
            'spectral_contrast', # differences in peaks and valleys of amplitudes of the sound
            'spectral_bandwidth', # total range of frequency bands
            'spectral_flatness' # range between noisiness and 'tone'-ness of song (tone as in a pure note tone)
            ]

ZCR_BIAS = 1e-9

for f in features:
  audio_func = getattr(librosa.feature, f)

  if f == 'zero_crossing_rate':
    signals_df[f] = signals_df.apply(lambda x: audio_func(y=(x['signal'] + ZCR_BIAS)), axis=1)
  elif f == 'spectral_flatness':
    signals_df[f] = signals_df.apply(lambda x: audio_func(y=x['signal']), axis=1)
  else:
    signals_df[f] = signals_df.apply(lambda x: audio_func(y=x['signal'], sr=x['sr']), axis=1)
    
  signals_df['%s_mean'%f] = signals_df[f].apply(lambda x: np.mean(x))
  signals_df['%s_var'%f] = signals_df[f].apply(lambda x: np.var(x, ddof=1))
  signals_df['%s_kurtosis'%f] = signals_df[f].apply(lambda x: kurtosis(np.ndarray.flatten(x), fisher=True))
  signals_df['%s_skew'%f] = signals_df[f].apply(lambda x: skew(np.ndarray.flatten(x)))

  signals_df.drop(columns=[f], inplace=True, axis=1)

# need to drop signal column or storage will blow up
signals_df.drop(columns=['signal'], inplace=True, axis=1)
print(signals_df.head())
print(start, end)

In [None]:
signals_df.to_csv('%s_%s.csv'%(start, end), encoding='utf-8', index=False)

In [None]:
# !gsutil cp -n 15501_15750.csv gs://deepclustermusic/song_freqs/