<a href="https://colab.research.google.com/github/jasperSha/cloud_music/blob/main/feature_collection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import auth
auth.authenticate_user()

In [None]:
!curl https://sdk.cloud.google.com | bash

In [2]:
!gcloud projects list

PROJECT_ID             NAME                         PROJECT_NUMBER
cloudmusic-330004      cloudmusic                   731970834007
cluster-music          cluster-music                850659529381
composed-field-268617  ReverseAddressLookUp         1042903851702
solar-theory-326420    natural-language-processing  128859147416
studenthelper-520f1    studenthelper                280871841346


In [2]:
!gcloud config set project cluster-music

Updated property [core/project].


In [1]:
!pip install soundfile



In [4]:
import os
# create virtual directory for image data
os.makedirs('song_images', exist_ok=True)

# create virtual directory for frequency data
os.makedirs('song_freqs', exist_ok=True)

# create virtual directory for the flac files
os.makedirs('song_flacs', exist_ok=True)

In [None]:
# load audio every N batches
# have to manually enter into head and tail cmd (only have to change tail input to starting index each time, head just adds to it)
start = 0
!gsutil ls gs://deepclustermusic/flac_files/spotify_yt_data/*.flac | tail -n +0 | head -n 100 | gsutil -m cp -n -I './song_flacs'

# load frequency data
# !gsutil cp gs://deepclustermusic/*.csv './song_freqs'

In [6]:
!ls ./song_flacs/*.flac | wc -l

100


In [7]:
# remove half-completed copies if need be
!rm /content/song_flacs/*.flac_.gstmp

In [None]:
# load metadata
!gsutil cp gs://deepclustermusic/gcp_meta.csv .

Copying gs://deepclustermusic/gcp_meta.csv...
/ [1 files][744.3 KiB/744.3 KiB]                                                
Operation completed over 1 objects/744.3 KiB.                                    


In [None]:
# upload data to google cloud
# !gsutil cp testresults.csv gs://deepclustermusic/

In [12]:
import glob

import numpy as np
import pandas as pd

from scipy.stats import kurtosis, skew

import librosa
import librosa.feature
import torchaudio


import torch

In [None]:
# set mel configurations
run_config = dict(
    sample_rate=44100,
    n_fft=4096,
    n_mels=224,
    hop_length=441,
    win_length=1764,
    f_max=20000
)

In [13]:
flacs = list(glob.glob('/content/song_flacs/*.flac'))

In [44]:
songs = []
for fname in flacs:
  song_id = fname.split('/')[-1].split('.')[0]
  signal, sr = torchaudio.load(fname)
  channel = signal.ndim

  # convert any stereo to mono
  if channel == 2:
    signal = torch.mean(signal, dim=0, keepdim=True)

  num_elements = torch.numel(signal)
  duration = int(num_elements/sr)

  if signal is None:
    continue

  # # librosa functions
  # if channel == 2:
  #   signal = (signal[:,0] + signal[:,1])/2
  # duration = int(signal.shape[0]/sr)

  if duration < 120 or duration > 480:
    continue

  # transpose and squash dim, librosa expects dim=(n,) whereas torchaudio returns (1, n)
  signal = signal.T
  signal = signal[:,0]
  s = {
      'id': song_id,
      'signal': signal.numpy(), # need to convert to np array, memory leak in storing tensor in df
      'sr': sr,
      'channel': channel,
      'duration': duration
  }
  songs.append(s)

In [45]:
df = pd.DataFrame(songs)

In [None]:
# metadata = pd.read_csv('gcp_meta.csv')

# update songs with their corresponding metadata
full_df = df.merge(metadata, on='id', how='left')
full_df.reset_index()
full_df.head()
# temporary for now (as metadata not fully synced between everyone)
df = full_df.dropna()
df.head()

In [None]:
# remove rows where no signal was able to be extracted (at least one case of it due to being an interview)
df = df[df['signal'].map(lambda x: len(x) > 0)]
df.reset_index()
df.head()

Unnamed: 0,id,signal,sr,channel,artist,title,duration
0,0gOFu5o5DRk,"[tensor(0.), tensor(0.)]",44100,2,311,311 jackolantern's weather,204.0
1,0vyrgy_cnXs,"[tensor(0.), tensor(0.)]",44100,2,JAW,JAW - Cymbalta,283.0
3,15rOzIzptt0,"[tensor(-0.0121), tensor(-0.0116)]",48000,2,Erika Jayne,Get It Tonight (feat. Flo Rida) (Vino Mix Mix),289.0
9,13oxxEOAT0M,"[tensor(-0.0010), tensor(-0.0009)]",48000,2,Andrei Krylov,Army of the Dark Souls Marching On,195.0
10,0jEX7AXsXRM,"[tensor(1.1623e-05), tensor(-4.4942e-05)]",48000,2,Buddy Ace,That's the Way Love Is,177.0


In [46]:
signals_df = df.copy()

In [47]:
print(signals_df.head(), signals_df.shape)
print(signals_df['signal'].iloc[0].shape)

            id  ... duration
0  -NdqcX_nZrs  ...      187
1  -Hx5BToQ3Nk  ...      210
2  -dF2a9P9gsY  ...      206
3  05Q5akU8d7g  ...      204
4  -xLidUmccx4  ...      310

[5 rows x 5 columns] (92, 5)
(8988909,)


In [48]:
features = [
            'zero_crossing_rate', # percussive sounds
            'spectral_centroid', # brightness
            'spectral_rolloff', # majority of frequency within which song presides
            'mfcc', # the perceptual shape(envelope of time power spectrum) of the sound
            'spectral_contrast', # differences in peaks and valleys of amplitudes of the sound
            'spectral_bandwidth', # total range of frequency bands
            'spectral_flatness' # range between noisiness and 'tone'-ness of song (tone as in a pure note tone)
            ]

ZCR_BIAS = 1e-9

for f in features:
  audio_func = getattr(librosa.feature, f)

  if f == 'zero_crossing_rate':
    signals_df[f] = signals_df.apply(lambda x: audio_func(y=(x['signal'] + ZCR_BIAS)), axis=1)
  elif f == 'spectral_flatness':
    signals_df[f] = signals_df.apply(lambda x: audio_func(y=x['signal']), axis=1)
  else:
    signals_df[f] = signals_df.apply(lambda x: audio_func(y=x['signal'], sr=x['sr']), axis=1)
    
  signals_df['%s_mean'%f] = signals_df[f].apply(lambda x: np.mean(x))
  signals_df['%s_var'%f] = signals_df[f].apply(lambda x: np.var(x, ddof=1))
  signals_df['%s_kurtosis'%f] = signals_df[f].apply(lambda x: kurtosis(np.ndarray.flatten(x), fisher=True))
  signals_df['%s_skew'%f] = signals_df[f].apply(lambda x: skew(np.ndarray.flatten(x)))

  signals_df.drop(columns=[f], inplace=True, axis=1)


In [None]:
signals_df.head()