In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.utils.data as data
import tqdm
import pandas as pd
import abc
import yaml
import h5py
import librosa
import os
import hydra
from hydra import compose, initialize
from glob import glob
from itertools import chain

In [2]:
'''
How to make the framework flexible enough that one can point to which samples in a batch are meant to be
support/query per class? The implementation in DCASE2021 does not handle this.
'''

'\nHow to make the framework flexible enough that one can point to which samples in a batch are meant to be\nsupport/query per class? The implementation in DCASE2021 does not handle this.\n'

## Prototypical net

In [3]:
#DCASE2021

def conv_block(in_channels,out_channels):

    return nn.Sequential(
        nn.Conv2d(in_channels,out_channels,3,padding=1),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )

In [4]:
#DCASE2021

#TODO introduce parametrization of conv blocks?
class Protonet(nn.Module):
    def __init__(self, raw_transformer=None):
        super(Protonet,self).__init__()
        self.raw_transformer = raw_transformer
        self.encoder = nn.Sequential(
            conv_block(1,128),
            conv_block(128,128),
            conv_block(128,128),
            conv_block(128,128)
        )
    def forward(self,x):
        #Is there risk for this to be super slow?
        #A naive approach might transform the same data more than once?
        #Lookup tables?
        if self.raw_transformer is not None:
            x = self.raw_transformer.rtoi_standard(x)
        (num_samples,seq_len,mel_bins) = x.shape
        x = x.view(-1,1,seq_len,mel_bins)
        x = self.encoder(x)
        return x.view(x.size(0),-1)

In [5]:
'''
Will most likely lean heavily on the implementation of the DCASE2021 task 5 baseline implementation.
'''
def prototypical_loss(input, target, n_support):
    pass

## Data processing

In [6]:
'''
    * Design choice: Handle most of pre-processing as part of the model (torchlibrosa)?
      May ultimately lead to simpler augmentation etc down the line. Work with raw audio as far as possible?
      
    * Make use of h5py library for storing training, validation and test sets?
      Still raw audio sets?
    
    * Incorporate pytorch Dataloader, seems prudent and a good design choice.
      read(h5py) file + Episodic sampler -> Dataloader?
      
    * Slight change of mind. Datagen and FeatureExtractor is not really worth spending time on for now.
      Sure they could be interfaces for a framework up the road but can do without for now since the loop
      will most likely be quite task dependent for now.
      
'''

'\n    * Design choice: Handle most of pre-processing as part of the model (torchlibrosa)?\n      May ultimately lead to simpler augmentation etc down the line. Work with raw audio as far as possible?\n      \n    * Make use of h5py library for storing training, validation and test sets?\n      Still raw audio sets?\n    \n    * Incorporate pytorch Dataloader, seems prudent and a good design choice.\n      read(h5py) file + Episodic sampler -> Dataloader?\n      \n    * Slight change of mind. Datagen and FeatureExtractor is not really worth spending time on for now.\n      Sure they could be interfaces for a framework up the road but can do without for now since the loop\n      will most likely be quite task dependent for now.\n      \n'

In [7]:
'''
Possibly take a h5 file as input and return X_train, Y_train, X_val, Y_val
Is this an approach that we like? Is it commonly used for deep learning?
'''

class Datagen():
    
    def __init__(self, conf):
        pass

In [16]:
'''
Possibly work on an raw files and annotations and return/write h5 files.
This might be clunky to include in a framework since this most likely is dataset dependent.
Might however benfit from having an interface which is inherited by classes working on specific datasets.
'''

class FeatureExtractor(abc.ABC):
    
    def __init__(self):
        pass

'''
Takes the data from the DCASE (all files one folder) and returns h5 file with the datasets 'features' and 'labels'
GOAL: Start with training data. Extract raw segments of interest.
Not perfect but does the job for now?
'''
class MyF_Ext(FeatureExtractor):
    
    def __init__(self, config):
        self.config = config
        
    def extract_features(self):
        
        if self.config.set.train:
            self.extract_train()
        else:
            self.extract_test()
    
    '''
    Assumes all *.csv and *.wav files are in the same folder which path is in config.
    Either creates spectrograms as features or raw audio segments containing events.
    Assumes annotations as those provided in 
    '''
    
    def extract_train(self):
        
        
        csv_files = [file for file in glob(os.path.join(self.config.path.data_train, '*.csv'))]
        
        if self.config.features.raw:
            
            events = []
            labels = []
            
            for file in csv_files:
            
                print('Processing ' + file.replace('csv', 'wav'))
                audio, sr = librosa.load(file.replace('csv', 'wav'), self.config.features.sr)
                df = pd.read_csv(file, header=0, index_col=False)
                df_pos = df[(df == 'POS').any(axis=1)]
                
                #Add config options for window size around event
                df_pos.loc[:, 'Starttime'] = df_pos['Starttime'] - 0.025
                df_pos.loc[:, 'Endtime'] = df_pos['Endtime'] + 0.025
                start_time = [int(np.floor(start * sr)) for start in df_pos['Starttime']]
                end_time = [int(np.floor(end * sr)) for end in df_pos['Endtime']]
                
                #Better way of doing this?
                for i in range(len(start_time)):
                    events += [audio[start_time[i]:end_time[i]]]
                    
                labels += list(chain.from_iterable(
                    [df_pos.columns[(df_pos == 'POS').loc[index]].values for index, _ in df_pos.iterrows()]))
            
            print('Padding')
            #Pad arrays in events and format for write
            max_len = 0
            for e in events:
                if len(e) > max_len:
                    max_len = len(e)
                    
            for i in range(len(events)):
                if len(events[i]) < max_len:
                    events[i] = np.append(events[i], np.array([self.config.features.raw_pad]*(max_len-len(events[i]))))
            
            events = np.array(events)
            
            print('Writing to file')
            
            hf = h5py.File(os.path.join(self.config.path.train_w, 'raw_train.h5'), 'w')
            hf.create_dataset('features', data=events)
            hf.create_dataset('labels', data=[s.encode() for s in labels], dtype='S20')
            hf.close()
            
            print('Done')
            
        else:
            
            #DCASE more or less
            
            fps = self.config.features.sr / self.config.hop_mel
            seg_len = int(round(self.config.features.seg_len * fps))
            hop_seg = int(round(self.config.features.hop_seg * fps))
            
            
            events = []
            labels = []
            
            for file in csv_files:
                
                print('Processing ' + file.replace('csv', 'wav'))
                audio, sr = librosa.load(file.replace('csv', 'wav'), self.config.features.sr)
                
                #PCEN STUFF HERE
                
                df = pd.read_csv(file, header=0, index_col=False)
                df_pos = df[(df == 'POS').any(axis=1)]
                
                start_time, end_time = time_2_frame(df_pos, fps)
                label_f = list(chain.from_iterable(
                    [df_pos.columns[(df_pos == 'POS').loc[index]].values for index, _ in df_pos.iterrows()]))
                
                for index in range(len(start_time)):
                    
                    str_ind = start_time[index]
                    end_ind = end_time[index]
                    label = label_f[index]
                    
                    if end_ind - str_ind > seg_len:
                        shift = 0
                        while end_ind - (str_ind + shift) > seg_len:
                            
                
    
    def extract_test(self, config):
        pass

In [9]:
#Instance with torchlibrosa to be included in model if input is raw.

class RawTransformer:
    
    def __init__(self, config):
        #Mel stuff etc
        self.config = config
    
    #Input is a training batch?
    def rtoi_standard(input):
        pass

## Episodic constructor

In [10]:
#DCASE 2021 ish
#Instance given to DataLoader on argument batch_sampler

class RandomEpisodicSampler(data.Sampler):
    
    #Include the option to choose the number of query samples
    #Y_train -> labels, just a list of the targets (list of ints?)
    def __init__(self, labels, n_episodes, n_way, n_support, n_query):
        
        #Number of episodes per epoch. len(labels)/(n_support * n_query) ?
        self.n_episodes = n_episodes
        self.n_way = n_way
        self.n_support = n_support
        self.n_query = n_query
        
        self.sample_indices = []
        for i in range(max(labels) + 1):
            ix = np.argwhere(labels == i).reshape(-1)
            ix = torch.from_numpy(ix)
            self.sample_indices.append(ix)
            
        if self.n_way > len(self.samples_indices):
            raise ValueError('Error: "n_way" parameter is higher than the unique number of classes')
    
    def __len__(self):
        return self.n_episodes
    
    def __iter__(self):
        yield None

## Config/parse/util

In [15]:
def time_2_frame(df,fps):


    'Margin of 25 ms around the onset and offsets'

    df.loc[:,'Starttime'] = df['Starttime'] - 0.025
    df.loc[:,'Endtime'] = df['Endtime'] + 0.025

    'Converting time to frames'

    start_time = [int(np.floor(start * fps)) for start in df['Starttime']]

    end_time = [int(np.floor(end * fps)) for end in df['Endtime']]

    return start_time,end_time

## Loop

## Test


In [11]:
initialize(job_name='test')

See https://hydra.cc/docs/next/upgrades/1.0_to_1.1/changes_to_hydra_main_config_path for more information.
  initialize(job_name='test')


hydra.initialize()

In [13]:
cfg = compose(config_name='config')

f_ext = MyF_Ext(cfg)
f_ext.extract_features()

Processing C:\Users\marti\Documents\playground\data\small\2015-09-04_08-04-59_unit03.wav
Processing C:\Users\marti\Documents\playground\data\small\dcase_MK1.wav
Padding
Writing to file
Done
