# Extract Whistles From Box 2 Box

We need to extract whistles played through the ocean and recorded by a box.

## Basic Idea
Use the original file to find gaps.
Align the recorded file to the original file (manually).
Find all gaps that are large enough in the original file:
    $$(gap_{i, start}, gap_{i, stop})$$.
    
Use the gaps to find whistles.

In [3]:
from mimic_utils.spectrogram import *
from scipy.io import wavfile

import matplotlib.pyplot as plt
import numpy as np

## The algorithms to extract the audio

In [138]:
class NameRing:
    ''' 
    Repeats the names in a ring fashion
    For the names a,b,c a name sequence is
    a b c a b c ... a b c
    '''
    def __init__(self, names):
        self.names = names
        self.i = 0
        
    def next(self):
        name = self.names[self.i]
        self.i += 1
        if self.i >= len(self.names):
            self.i = 0
        return name
    
    
def extract_all(original, recorded, output, names, border = 30000, min_gap_size = 100000):
    basename = recorded.split("/")[-1].replace(".wav", "")
    print(basename)
    _,  data_original = wavfile.read(original)
    fs, data_box      = wavfile.read(recorded)
    data = (data_original[:, 0] + 32768) / (32768 + 32767)    

    last_sample = 0
    start_i = 0    
    gaps = []
    
    n = len(data_original)
    for i in range(10, n):
        sample = np.sum(data[i - 10:i]) / 10
        if sample < 0.1 and last_sample >= 0.1 and i - start_i > min_gap_size:
            print("STOP {} {} {} %done {}".format(start_i, i, i - start_i, (i / n) * 100))
            gaps.append([start_i, i])
        if sample >= 0.1 and last_sample < 0.1:
            start_i = i
        last_sample = sample
        
    for i in range(1, len(gaps)):  
        _, start = gaps[i - 1]
        stop, _  = gaps[i]
        name = names.next()
        wavfile.write('{}/{}_{}_{}.wav'.format(output, basename, start, name), fs, data_box[start - border: stop + border, 0])

## Extract the 00 variations

In [None]:
names    = NameRing(names = ["den", "gra", "rop", "sar", "sca"])  
output   = "00" 
original = '/Users/daniel.kohlsdorf/Desktop/00-all-whistles-2019-synth--04--18.wav'
recorded = '/Users/daniel.kohlsdorf/Desktop/chat1-2019-06-23T123304-192k.wav'
extract_all(original, recorded, output, names)

chat1-2019-06-23T123304-192k
STOP 10 380255 380245 %done 0.5756447507323725
STOP 704629 883552 178923 %done 1.3375552479233388
STOP 1202016 1379643 177627 %done 2.0885570231414774
STOP 1694599 1868245 173646 %done 2.8282216600228827
STOP 2179148 2358291 179143 %done 3.5700722800473295
STOP 2672597 2833848 161251 %done 4.28998889054301
STOP 3074046 3219063 145017 %done 4.873142281434309
STOP 3409625 3550703 141078 %done 5.3751917617380105
STOP 3739048 3881052 142004 %done 5.875286876226152
STOP 4048326 4203174 154848 %done 6.362927639386172
STOP 4346495 4871424 524929 %done 7.37455037853992
STOP 5193913 5374738 180825 %done 8.136486610989495
STOP 5691304 5870834 179530 %done 8.887495955401342
STOP 6183918 6359378 175460 %done 9.627072789635726
STOP 6668504 6849410 180906 %done 10.36890221591779
STOP 6866153 6978423 112270 %done 10.564207093503189
STOP 7161890 7324321 162431 %done 11.08784088658632
STOP 7564480 7710034 145554 %done 11.671748169170995
STOP 7899700 8040968 141268 %done 12.