# Removing Silence

I show how to cut the audio up based on whether there is sound in it.

In [3]:
## Notebook settings
%reload_ext autoreload
%autoreload 2
%matplotlib inline

Please makes sure you have the [fastai-audio](https://github.com/mogwai/fastai-audio) repo installed in the `fastai_audio` directory

In [4]:
from exp import *

ModuleNotFoundError: No module named 'exp'

Loading the data

In [None]:
ad = AudioData.load('./Right_whale.wav')
sr = ad.sr

Using this we can get a better idea of the samples inside the audio track and what ranges we need to filter out

In [None]:
def stats(ad):
    print(f'Min: {ad.sig.min()}')
    print(f'Max: {ad.sig.max()}')
    print(f'Mean: {ad.sig.mean()}')
    print(f'std: {ad.sig.std()}')
    print(f'Shape: {ad.sig.shape}')

In [None]:
s = AudioItem(ad)
plt.plot(s.data.sig)
s.hear()
stats(ad)

# Filtering by range

We can see from the above roughly what ranges we're interested in so we'll filter by that

In [None]:
sig = s.data.sig.clone()
sig[(sig < .03) & (sig > -.03)] = 0 
wos = AudioItem(AudioData(sig, s.data.sr))
plt.plot(sig)

Above you can see that the main calls have been selected but the audio sounds strange without a bit of padding

In [None]:
wos.hear()

If we know there is only one call in the file we could just slam it all together

In [None]:
sig = s.data.sig[(wos.data.sig != 0)]
AudioItem(AudioData(sig, sr)).hear()
plt.plot(sig)

This sounds a bit funky though ;) 

Lets cut out the parts we're interested in and add some padding to them

We'll create samples as long as we find frequencies within the padding

In [None]:
groups = []
looking = 0
c = 0
actual = s.data.sig
padding = int(len(actual)*0.015) # This is the magic number

for index, i in enumerate(wos.data.sig):
    if looking: 
        c+=1
        if index - looking >= padding:
            groups.append(actual[index-c-padding: index])
            c = 0
            looking = False
            continue
    

print('Number of calls found:', len(groups))

There we have it 

In [None]:
print(ad.sig.shape)
def split_by_silence(ad:AudioData, thresholds=None, pad_ms=200)->[AudioData]:
    actual = ad.sig.clone()
    sr = ad.sr
    if not thresholds: thresholds = (actual < .03) & (actual > -.03)
    
    sig = actual.clone()
    sig[thresholds] = 0
    
    groups = []
    looking = 0
    c = 0
    padding = int(pad_ms/1000*sr) # This is the magic number

    for index, i in enumerate(sig):
        if looking: 
            c+=1
            if index - looking >= padding:
                nd = actual[index-c-padding: index]
                groups.append(AudioData(nd, sr))
                c = 0
                looking = False
                continue
        
        if not i.equal(torch.tensor(.0)):
            looking = index
    
    return groups

In [None]:
groups = split_by_silence(ad, pad_ms=250)
print(len(groups))
for i in range(5):
    d = groups[i]
    plt.plot(d.sig)
    plt.show()
#     Optionally to save the audio:
#     torchaudio.save('whale'+str(i)+'.wav', groups[i], sr )
    AudioItem(d).hear()