In [None]:
!pip install wandb
!wandb login

Collecting wandb
  Downloading wandb-0.12.11-py2.py3-none-any.whl (1.7 MB)
[?25l[K     |▏                               | 10 kB 11.3 MB/s eta 0:00:01[K     |▍                               | 20 kB 15.1 MB/s eta 0:00:01[K     |▋                               | 30 kB 18.7 MB/s eta 0:00:01[K     |▊                               | 40 kB 17.6 MB/s eta 0:00:01[K     |█                               | 51 kB 14.8 MB/s eta 0:00:01[K     |█▏                              | 61 kB 16.7 MB/s eta 0:00:01[K     |█▎                              | 71 kB 16.2 MB/s eta 0:00:01[K     |█▌                              | 81 kB 17.6 MB/s eta 0:00:01[K     |█▊                              | 92 kB 19.1 MB/s eta 0:00:01[K     |█▉                              | 102 kB 19.1 MB/s eta 0:00:01[K     |██                              | 112 kB 19.1 MB/s eta 0:00:01[K     |██▎                             | 122 kB 19.1 MB/s eta 0:00:01[K     |██▍                             | 133 kB 19.1 MB/s eta

# Upload Data to W & B

Given a folder of data, upload it to Weights & Biases as an artifact.

In [None]:
from google.colab import drive
drive.mount("/content/gdrive")

import wandb

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## Upload Dataset from Drive
Run this cell to copy dataset from Google Drive to W & B.

In [None]:
import os

"""
Upload a dataset from the Colab file system, to weights and biases.
Running multiple times is OK and will just increase the version of the artifact.

Also creates a table for the dataset, to let us access files more easily.
"""

GDRIVE_DATASET_1 = "/content/gdrive/MyDrive/APS360 Group Project/Dataset 1"
GDRIVE_DATASET_2 = "/content/gdrive/MyDrive/APS360 Group Project/Dataset 2"

!rm -f "/content/gdrive/MyDrive/APS360 Group Project/Dataset 2/.DS_Store"

with wandb.init(project="deep-audio-isolation", job_type="dataset-upload", entity="aps360") as run:
  dataset1 = GDRIVE_DATASET_1
  dataset2 = GDRIVE_DATASET_2

  artifact = wandb.Artifact("tracks-raw", type="dataset")

  # Add all raw file-system files to the artifact.
  artifact.add_dir(dataset1, "tracks1")
  artifact.add_dir(dataset2, "tracks2")

  # Create a table. This will let us access and view the tracks more easily.
  tracks = wandb.Table(columns=["id", "mixture", "vocals"])

  # Add the audio files for each track as a row in the table.
  for track_dir in os.scandir(dataset1):
    tracks.add_data(
        track_dir.name,
        wandb.Audio(f"{track_dir.path}/mixture.wav"),
        wandb.Audio(f"{track_dir.path}/vocals.wav")
    )

  # Dataset 2 is more complex, since not every entry has vocals.
  for track_dir in os.scandir(dataset2):
    tracks.add_data(
        track_dir.name,
        wandb.Audio(f"{track_dir.path}/mixture.wav"),
        wandb.Audio(f"{track_dir.path}/vocals.wav") if os.path.exists(f"{track_dir.path}/vocals.wav") else None
    )
  
  # Add table to the artifact, together with the raw files.
  artifact.add(tracks, "Tracks")
  
  run.log_artifact(artifact)

[34m[1mwandb[0m: Adding directory to artifact (/content/gdrive/MyDrive/APS360 Group Project/Dataset 1)... Done. 2.3s
[34m[1mwandb[0m: Adding directory to artifact (/content/gdrive/MyDrive/APS360 Group Project/Dataset 2)... Done. 13.8s





VBox(children=(Label(value='5324.349 MB of 5324.349 MB uploaded (329.646 MB deduped)\r'), FloatProgress(value=…

## Download Data from W & B
This is how this dataset can be accessed in code.

In [None]:
with wandb.init(entity="aps360", project="deep-audio-isolation") as run:
  artifact = run.use_artifact('tracks-raw:latest', type='dataset')

  # Path to directory.
  tracks_dir = artifact.download()
  print(tracks_dir)

[34m[1mwandb[0m: Downloading large artifact tracks-raw:latest, 5326.64MB. 529 files... Done. 0:0:0


./artifacts/tracks-raw:v5



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

# Transform Data

## Convert Audio into PCM Tensors

In [None]:
import wandb
import scipy.io.wavfile
import numpy as np
import torch
import os

with wandb.init(project="deep-audio-isolation", entity="aps360", job_type="data") as run:
  # Get latest version of audio tracks.
  tracks_artifact = run.use_artifact('tracks-raw:latest', type='dataset')
  tracks_dir = tracks_artifact.download()

  tracks1_root = f"{tracks_dir}/tracks1"
  tracks2_root = f"{tracks_dir}/tracks2"
  
  # Load all tracks, and convert to a big tensor.
  vocals = []
  mixtures = []

  # Sample rate is the same for all tracks.
  sample_rate = 0

  target_length = 0

  for dir in os.listdir(tracks1_root):
    mixture_path = f"{tracks1_root}/{dir}/mixture.wav"
    vocals_path = f"{tracks1_root}/{dir}/vocals.wav"

    sample_rate, mixture_data = scipy.io.wavfile.read(mixture_path)
    sample_rate, vocals_data = scipy.io.wavfile.read(vocals_path)

    target_length = vocals_data.shape[0]

    vocals += [vocals_data[:, 0]]
    mixtures += [mixture_data[:, 0]]


  # The second directory is more complicated, because:
  # 1) not everything has a vocals.
  # 2) all the lengths are way longer.
  # Current approach is to ignore ones with no vocals, and to choose the first 7s with audio for ones that do.
  for dir in os.listdir(tracks2_root):
    mixture_path = f"{tracks2_root}/{dir}/mixture.wav"
    vocals_path = f"{tracks2_root}/{dir}/vocals.wav"

    no_vocals = not os.path.exists(vocals_path)

    # if not os.path.exists(vocals_path):
    #   continue

    sample_rate, mixture_data = scipy.io.wavfile.read(mixture_path)

    vocals_data = None
    if no_vocals:
      continue
      vocals_data = np.zeros_like(mixture_data)
    else:
      _, vocals_data = scipy.io.wavfile.read(vocals_path)

    # PCM is on a ~ +- 32k range, so take at least X as presence of sound.
    # first_vocal_index = np.where(np.abs(vocals_data) > 5000)[0][0]

    # Just adding all the clips, up to a maximum of X. This is a terrible idea.
    # But, adding the audio clips spaced randomly.
    # num_chunks = len(mixture_data) // target_length
    # chunked_vocals = np.split(vocals_data, target_length)

    # Leave some spacing between samples.
    for i in range(0, len(mixture_data) - target_length, 3*target_length):
      mixtures += [mixture_data[i:i+target_length][:, 0]]
      vocals += [vocals_data[i:i+target_length][:, 0]]


  #   vocals += [vocals_data[first_vocal_index:first_vocal_index + target_length]]
  #   mixtures += [mixture_data[first_vocal_index:first_vocal_index + target_length]]

  # print(len(vocals))
  # Load into numpy, and convert to mono.
  # print(len(mixtures), len(mixtures[0]), len(mixtures[-1]#))
  mixtures_np = np.array(mixtures)#np.array(mixtures)[:, :, 0]
  vocals_np = np.array(vocals)
  # vocals_np = np.array(vocals)#np.array(vocals)[:, :, 0]

  # Save as tensors, with sample-rate embedded inside.
  torch.save({
      "mixtures": torch.tensor(mixtures_np),
      "vocals": torch.tensor(vocals_np),
      "sample_rate": sample_rate
  }, "pcm.pt")

  artifact = wandb.Artifact("tracks-pcm-experimental-noinstr", type="dataset")
  artifact.add_file("pcm.pt")
  run.log_artifact(artifact)

[34m[1mwandb[0m: Currently logged in as: [33mjamohile[0m (use `wandb login --relogin` to force relogin)


[34m[1mwandb[0m: Downloading large artifact tracks-raw:latest, 5326.64MB. 529 files... Done. 0:0:0





VBox(children=(Label(value='492.150 MB of 492.150 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0,…

## Convert PCM Audio into STFT Tensors

In [None]:
import wandb
import scipy.signal
import torch

config = {
  "segment": 256,
  "overlap": 128    
}

with wandb.init(project="deep-audio-isolation", entity="aps360", job_type="data", config=config) as run:
  config = wandb.config

  # Download PCM tensors.
  pcm_artifact = run.use_artifact('tracks-pcm:latest', type='dataset')
  pcm = torch.load("pcm.pt")

  pcm_mixtures = pcm["mixtures"]
  pcm_vocals = pcm["vocals"]
  sample_rate = pcm["sample_rate"]

  segment = config["segment"]
  overlap = config["overlap"]

  _, _, mixtures = scipy.signal.stft(x=pcm_mixtures, fs=sample_rate, nperseg=segment, noverlap=overlap)
  _, _, vocals = scipy.signal.stft(x=pcm_vocals, fs=sample_rate, nperseg=segment, noverlap=overlap)

  torch.save({
      "mixtures": torch.tensor(mixtures),
      "vocals": torch.tensor(vocals),
      "sample_rate": sample_rate
  }, "freqs.pt")

  artifact = wandb.Artifact(f"tracks-freqs-s{segment}_o{overlap}", type="dataset")
  artifact.add_file("freqs.pt")
  run.log_artifact(artifact)