<a href="https://colab.research.google.com/github/jasperSha/cloud_music/blob/main/frequency_collection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import auth
auth.authenticate_user()

In [9]:
# !curl https://sdk.cloud.google.com | bash

In [10]:
!gcloud projects list

PROJECT_ID             NAME                         PROJECT_NUMBER
cloudmusic-330004      cloudmusic                   731970834007
cluster-music          cluster-music                850659529381
composed-field-268617  ReverseAddressLookUp         1042903851702
solar-theory-326420    natural-language-processing  128859147416
studenthelper-520f1    studenthelper                280871841346


In [2]:
!gcloud config set project cluster-music

Updated property [core/project].


In [3]:
import os
# create virtual directory for frequency data
os.makedirs('song_freqs', exist_ok=True)

# create virtual directory for the flac files
os.makedirs('song_flacs', exist_ok=True)

In [None]:
# load audio every N batches
# have to manually enter (only have to change tail input to starting index each time, head just adds to it)
start = 1001
end = 1500

# load frequency data
!gsutil -m cp gs://deepclustermusic/song_freqs/*.csv .

!gsutil ls gs://deepclustermusic/flac_files/spotify_yt_data/*.flac | tail -n +1001 | head -n 500 | gsutil -m cp -n -I './song_flacs'


In [None]:
!pip install fastaudio

In [6]:
import glob

import numpy as np
import pandas as pd

from scipy.stats import kurtosis, skew

import librosa
import librosa.feature

import torchaudio
import torch

In [7]:
flacs = list(glob.glob('/content/song_flacs/*.flac'))

# read finished frequency features in here
finished = list(glob.glob('*.csv'))
finished_df = pd.concat((pd.read_csv(f) for f in finished))

In [8]:
songs = []
for fname in flacs:
  song_id = fname.split('/')[-1].split('.')[0]

  if song_id in finished_df['id'].values:
    print('already finished this one')
    continue

  signal, sr = torchaudio.load(fname)
  channel = signal.ndim

  # convert any stereo to mono
  if channel == 2:
    signal = torch.mean(signal, dim=0, keepdim=True)

  if signal is None:
    continue

  # get duration in seconds
  num_elements = torch.numel(signal)
  duration = int(num_elements/sr)

  if duration < 120 or duration > 480:
    continue

  # transpose and squash dim, librosa expects dim=(n,) whereas torchaudio returns (1, n)
  signal = signal.T
  signal = signal[:,0]

  s = {
      'id': song_id,
      'signal': signal.numpy(), # need to convert to np array, memory leak in storing tensor in df
      'sr': sr,
      'channel': channel,
      'duration': duration
  }
  songs.append(s)

df = pd.DataFrame(songs)

signals_df = df.copy()

features = [
            'zero_crossing_rate', # percussive sounds
            'spectral_centroid', # brightness
            'spectral_rolloff', # majority of frequency within which song presides
            'mfcc', # the perceptual shape(envelope of time power spectrum) of the sound
            'spectral_contrast', # differences in peaks and valleys of amplitudes of the sound
            'spectral_bandwidth', # total range of frequency bands
            'spectral_flatness' # range between noisiness and 'tone'-ness of song (tone as in a pure note tone)
            ]

ZCR_BIAS = 1e-9

for f in features:
  audio_func = getattr(librosa.feature, f)

  if f == 'zero_crossing_rate':
    signals_df[f] = signals_df.apply(lambda x: audio_func(y=(x['signal'] + ZCR_BIAS)), axis=1)
  elif f == 'spectral_flatness':
    signals_df[f] = signals_df.apply(lambda x: audio_func(y=x['signal']), axis=1)
  else:
    signals_df[f] = signals_df.apply(lambda x: audio_func(y=x['signal'], sr=x['sr']), axis=1)
    
  signals_df['%s_mean'%f] = signals_df[f].apply(lambda x: np.mean(x))
  signals_df['%s_var'%f] = signals_df[f].apply(lambda x: np.var(x, ddof=1))
  signals_df['%s_kurtosis'%f] = signals_df[f].apply(lambda x: kurtosis(np.ndarray.flatten(x), fisher=True))
  signals_df['%s_skew'%f] = signals_df[f].apply(lambda x: skew(np.ndarray.flatten(x)))

  signals_df.drop(columns=[f], inplace=True, axis=1)

# need to drop signal column or storage will blow up
signals_df.drop(columns=['signal'], inplace=True, axis=1)
print(signals_df.head())
print(start, end)

            id     sr  ...  spectral_flatness_kurtosis  spectral_flatness_skew
0  IrUkD07oUFg  48000  ...                  115.525460               10.832779
1  JuwLQGWgYnA  48000  ...                  346.945147               18.467583
2  F-zrm3vGbNw  48000  ...                  109.178706               10.516065
3  J6M5O-9oAyY  44100  ...                    6.233759                2.869390
4  HrBfLKR6HHM  48000  ...                 2742.430231               49.409058

[5 rows x 32 columns]
1001 1500


In [9]:
signals_df.to_csv('%s_%s.csv'%(start, end), encoding='utf-8', index=False)

In [11]:
!gsutil cp -n 1001_1500.csv gs://deepclustermusic/song_freqs/

Copying file://1001_1500.csv [Content-Type=text/csv]...
/ [1 files][255.3 KiB/255.3 KiB]                                                
Operation completed over 1 objects/255.3 KiB.                                    
