In [1]:
import asteroid
asteroid.__version__

'0.3.3'

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import re
from pathlib import Path
import librosa

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [3]:
p = Path(r'Notebooks/MiniLibriMix').glob('**/*')
FILES = [x for x in p if x.is_file()]

In [4]:
class DataHelper(object):
    def __init__(self, files=FILES):
        self.files = files
        
    def __len__(self):
        return len(self.files)
        
    def get_file_types(self):
        strings = [str(f) for f in self.files]
        types = set(re.split('\.',s)[-1] for s in strings)
        return types
    
    def train_test_split(self):
        strings = [str(f) for f in self.files]
        train = [s for s in strings if re.search('/train/', s)]
        test = [s for s in strings if re.search('/val/', s)]
        return train, test
    
    def load_csv_tables(self):
        train, test = self.train_test_split()
        
        train_csvs = [t for t in train if re.search('.csv', t)]
        test_csvs = [t for t in test if re.search('.csv', t)]
        
        train_dfs = [pd.read_csv(t).rename(columns={'Unnamed: 0': 'index'}) for t in train_csvs]
        train_names = [re.split('/', re.split('\.', t)[-2])[-1] for t in train_csvs]
        
        test_dfs = [pd.read_csv(t).rename(columns={'Unnamed: 0': 'index'}) for t in test_csvs]
        test_names = [re.split('/', re.split('\.', t)[-2])[-1] for t in train_csvs]
        
        train_dict = dict(zip(train_names, train_dfs))
        test_dict = dict(zip(test_names, test_dfs))
        return train_dict, test_dict

In [5]:
datahelper = DataHelper()
len(datahelper)
datahelper.get_file_types()
train, test = datahelper.train_test_split()
len(train), len(test)
train_dfs, test_dfs = datahelper.load_csv_tables()

In [6]:
train_df = train_dfs['mixture_train_mix_clean']

In [7]:
example_1 = train_df.iloc[0]
example_1

index                                                           70
mixture_ID                         5400-34479-0005_4973-24515-0007
mixture_path     MiniLibriMix/train/mix_clean/5400-34479-0005_4...
source_1_path    MiniLibriMix/train/s1/5400-34479-0005_4973-245...
source_2_path    MiniLibriMix/train/s2/5400-34479-0005_4973-245...
length                                                      121920
Name: 0, dtype: object

In [8]:
mixture_path = example_1['mixture_path']
src_1_path = example_1['source_1_path']
src_2_path = example_1['source_2_path']

In [9]:
mixture, sr = librosa.load('Notebooks/'+mixture_path, sr=None)
src_1, _ = librosa.load('Notebooks/'+src_1_path, sr=None)
src_2, _ = librosa.load('Notebooks/'+src_2_path, sr=None)

In [10]:
def generate_spectrogram(signal, sr=8000, low_lim=0, high_lim=8000//2):
    S = librosa.stft(signal)
    return S

In [11]:
S_X = np.expand_dims(np.expand_dims(generate_spectrogram(mixture)[:1024, :128], axis=0),axis=0)
S_X.shape

(1, 1, 1024, 128)

In [12]:
S_Y1 = np.expand_dims(generate_spectrogram(src_1)[:1024, :128], axis=0)
S_Y2 = np.expand_dims(generate_spectrogram(src_2)[:1024, :128], axis=0)
S_Y = np.expand_dims(np.concatenate([S_Y1, S_Y2], axis=0), axis=0)
S_Y.shape

(1, 2, 1024, 128)

In [13]:
from IPython.display import Audio
Audio(librosa.core.istft(np.squeeze(S_X[0,0,:,:])), rate=8000)

In [14]:
Audio(librosa.core.istft(np.squeeze(S_Y[0,0,:,:])), rate=8000)

In [15]:
Audio(librosa.core.istft(np.squeeze(S_Y[0,1,:,:])), rate=8000)

In [16]:
dir(asteroid.models)

['ConvTasNet',
 'DPRNNTasNet',
 'DPTNet',
 'DeMask',
 'LSTMTasNet',
 'SuDORMRFImprovedNet',
 'SuDORMRFNet',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'base_models',
 'conv_tasnet',
 'demask',
 'dprnn_tasnet',
 'dptnet',
 'get',
 'lstm_tasnet',
 'publisher',
 'register_model',
 'save_publishable',
 'sudormrf',
 'upload_publishable',
 'zenodo']

In [17]:
model = asteroid.models.conv_tasnet.ConvTasNet(n_src=2)

In [18]:
model

ConvTasNet(
  (encoder): Encoder(
    (filterbank): FreeFB()
  )
  (masker): TDConvNet(
    (bottleneck): Sequential(
      (0): GlobLN()
      (1): Conv1d(512, 128, kernel_size=(1,), stride=(1,))
    )
    (TCN): ModuleList(
      (0): Conv1DBlock(
        (shared_block): Sequential(
          (0): Conv1d(128, 512, kernel_size=(1,), stride=(1,))
          (1): PReLU(num_parameters=1)
          (2): GlobLN()
          (3): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=(1,), groups=512)
          (4): PReLU(num_parameters=1)
          (5): GlobLN()
        )
        (res_conv): Conv1d(512, 128, kernel_size=(1,), stride=(1,))
        (skip_conv): Conv1d(512, 128, kernel_size=(1,), stride=(1,))
      )
      (1): Conv1DBlock(
        (shared_block): Sequential(
          (0): Conv1d(128, 512, kernel_size=(1,), stride=(1,))
          (1): PReLU(num_parameters=1)
          (2): GlobLN()
          (3): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=(2,), dilation=(2,), gro

In [19]:
inpt = mixture
outpt = model(torch.Tensor(mixture)).detach().numpy()

In [20]:
Audio(inpt, rate=8000)

In [21]:
Audio(outpt[0], rate=8000)

In [22]:
from asteroid.losses import PITLossWrapper
from asteroid.losses import pairwise_mse, singlesrc_mse, multisrc_mse

model = asteroid.models.conv_tasnet.ConvTasNet(n_src=2).to('cuda')
X = torch.Tensor(np.expand_dims(np.expand_dims(mixture, axis=0),axis=0)).to('cuda')
Y = torch.Tensor(np.expand_dims(np.vstack([src_1, src_2]),axis=0)).to('cuda')
loss_func = asteroid.losses.PITLossWrapper(pairwise_mse, pit_from='pw_mtx')

import torch.optim as optim

criterion = loss_func
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [23]:
losses = []
for epoch in range(200):

    X_inputs, Y_outputs = X, Y

    optimizer.zero_grad()

    outputs = model(X_inputs)
    loss = criterion(outputs, Y_outputs)
    loss.backward()
    optimizer.step()
    losses.append(loss.item())

    print(f'Epoch: {epoch} >>>>>>>>>>>>>> Loss: {loss.item()}')

Epoch: 0 >>>>>>>>>>>>>> Loss: 0.0005621500895358622
Epoch: 1 >>>>>>>>>>>>>> Loss: 0.0005621499149128795
Epoch: 2 >>>>>>>>>>>>>> Loss: 0.000562149565666914
Epoch: 3 >>>>>>>>>>>>>> Loss: 0.0005621489835903049
Epoch: 4 >>>>>>>>>>>>>> Loss: 0.0005621482850983739
Epoch: 5 >>>>>>>>>>>>>> Loss: 0.0005621474701911211
Epoch: 6 >>>>>>>>>>>>>> Loss: 0.0005621465388685465
Epoch: 7 >>>>>>>>>>>>>> Loss: 0.00056214549113065
Epoch: 8 >>>>>>>>>>>>>> Loss: 0.0005621443269774318
Epoch: 9 >>>>>>>>>>>>>> Loss: 0.0005621430464088917
Epoch: 10 >>>>>>>>>>>>>> Loss: 0.0005621417658403516
Epoch: 11 >>>>>>>>>>>>>> Loss: 0.0005621403688564897
Epoch: 12 >>>>>>>>>>>>>> Loss: 0.0005621389136649668
Epoch: 13 >>>>>>>>>>>>>> Loss: 0.0005621373420581222
Epoch: 14 >>>>>>>>>>>>>> Loss: 0.0005621358286589384
Epoch: 15 >>>>>>>>>>>>>> Loss: 0.0005621341988444328
Epoch: 16 >>>>>>>>>>>>>> Loss: 0.0005621325690299273
Epoch: 17 >>>>>>>>>>>>>> Loss: 0.0005621308228000998
Epoch: 18 >>>>>>>>>>>>>> Loss: 0.0005621290765702724
Epoch:

Epoch: 154 >>>>>>>>>>>>>> Loss: 0.0005618551513180137
Epoch: 155 >>>>>>>>>>>>>> Loss: 0.0005618530558422208
Epoch: 156 >>>>>>>>>>>>>> Loss: 0.0005618510767817497
Epoch: 157 >>>>>>>>>>>>>> Loss: 0.0005618490977212787
Epoch: 158 >>>>>>>>>>>>>> Loss: 0.0005618470022454858
Epoch: 159 >>>>>>>>>>>>>> Loss: 0.0005618449649773538
Epoch: 160 >>>>>>>>>>>>>> Loss: 0.0005618429277092218
Epoch: 161 >>>>>>>>>>>>>> Loss: 0.0005618408904410899
Epoch: 162 >>>>>>>>>>>>>> Loss: 0.0005618388531729579
Epoch: 163 >>>>>>>>>>>>>> Loss: 0.000561836757697165
Epoch: 164 >>>>>>>>>>>>>> Loss: 0.000561834778636694
Epoch: 165 >>>>>>>>>>>>>> Loss: 0.000561832741368562
Epoch: 166 >>>>>>>>>>>>>> Loss: 0.00056183070410043
Epoch: 167 >>>>>>>>>>>>>> Loss: 0.000561828725039959
Epoch: 168 >>>>>>>>>>>>>> Loss: 0.0005618267459794879
Epoch: 169 >>>>>>>>>>>>>> Loss: 0.000561824650503695
Epoch: 170 >>>>>>>>>>>>>> Loss: 0.000561822613235563
Epoch: 171 >>>>>>>>>>>>>> Loss: 0.0005618205759674311
Epoch: 172 >>>>>>>>>>>>>> Loss: 0.00

In [27]:
X.shape

torch.Size([1, 1, 121920])

In [28]:
Y.shape

torch.Size([1, 2, 121920])

In [29]:
with torch.no_grad():
    out = model(X)

In [36]:
out1 = out.cpu().numpy()[0,0,:]
out2 = out.cpu().numpy()[0,1,:]

In [38]:
Audio(out1, rate=8000)

In [39]:
Audio(out2, rate=8000)

In [41]:
Audio(Y[0,0,:].cpu().numpy(), rate=8000)

In [91]:
from torch.optim import Adam
from torch.utils.data import DataLoader
import pytorch_lightning as pl

from asteroid.data import LibriMix
from asteroid.engine.system import System
from asteroid.losses import PITLossWrapper, pairwise_neg_sisdr
from asteroid import ConvTasNet

train_set, val_set = LibriMix.mini_from_download(task='sep_clean')
train_loader = DataLoader(train_set, batch_size=4, drop_last=True)
val_loader = DataLoader(val_set, batch_size=4, drop_last=True)

# Define model and optimizer (one repeat to be faster)
model = ConvTasNet(n_src=2, n_repeats=1)
optimizer = Adam(model.parameters(), lr=1e-3)
# Define Loss function.
loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx')
# Define System
system = System(model=model, loss_func=loss_func, optimizer=optimizer,
                train_loader=train_loader, val_loader=val_loader)
# Define lightning trainer, and train
trainer = pl.Trainer(min_epochs=10)
trainer.fit(system)

HBox(children=(FloatProgress(value=0.0, max=640547371.0), HTML(value='')))




GPU available: True, used: False
TPU available: False, using: 0 TPU cores

  | Name      | Type           | Params
---------------------------------------------
0 | model     | ConvTasNet     | 1 M   
1 | loss_func | PITLossWrapper | 0     


Drop 0 utterances from 800 (shorter than 3 seconds)
Drop 0 utterances from 200 (shorter than 3 seconds)


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

Saving latest checkpoint..





1

In [46]:
X0, Y0 = next(iter(train_loader))

In [47]:
X0.size()

torch.Size([4, 24000])

In [48]:
Y0.size()

torch.Size([4, 2, 24000])

In [49]:
Audio(X0[0].numpy(), rate=8000)

In [50]:
Audio(Y0[0,0,:].numpy(), rate=8000)

In [51]:
Audio(Y0[0,1,:].numpy(), rate=8000)

In [88]:
model(X0).size()

torch.Size([4, 2, 24000])

In [90]:
Audio(model(X0).detach().cpu().numpy()[0,0,:], rate=8000)

In [81]:
ls

epoch=0.ckpt


In [82]:
path = Path('lightning_logs/version_0/checkpoints/epoch=0.ckpt')

In [83]:
path

PosixPath('lightning_logs/version_0/checkpoints/epoch=0.ckpt')

In [85]:
System.load_from_checkpoint('lightning_logs/version_0/checkpoints/epoch=0.ckpt')

FileNotFoundError: [Errno 2] No such file or directory: 'lightning_logs/version_0/checkpoints/epoch=0.ckpt'

In [75]:
Path

pathlib.Path

In [67]:
pwd

'/home/peterbermant/Workspace/Cocktail-Party-Problem/Human-Speech-and-Music/Asteroid-Experimentation'

In [68]:
cd lightning_logs

/home/peterbermant/Workspace/Cocktail-Party-Problem/Human-Speech-and-Music/Asteroid-Experimentation/lightning_logs


In [69]:
ls

[0m[01;34mversion_0[0m/


In [70]:
cd version_0

/home/peterbermant/Workspace/Cocktail-Party-Problem/Human-Speech-and-Music/Asteroid-Experimentation/lightning_logs/version_0


In [71]:
ls

[0m[01;34mcheckpoints[0m/                                                   hparams.yaml
events.out.tfevents.1600706014.peter-deep-ml-workspace.2774.0


In [72]:
cd checkpoints

/home/peterbermant/Workspace/Cocktail-Party-Problem/Human-Speech-and-Music/Asteroid-Experimentation/lightning_logs/version_0/checkpoints


In [73]:
ls

epoch=0.ckpt


In [26]:
.shape

SyntaxError: invalid syntax (<ipython-input-26-abefa8d9bb1a>, line 1)