In [None]:
!python3 -m pip install PyMySQL
!python3 -m pip install SQLAlchemy
!python3 -m pip install google-cloud-storage
!python3 -m pip install --upgrade --quiet scikit-sound
!python3 -m pip install --upgrade --quiet pygame
!sudo apt-get -y install ffmpeg
!sudo apt-get -y install python3-pymysql

In [1]:
import sqlalchemy
import numpy as np

from google.cloud import storage
from numpy.fft import fft, ifft
from sksound.sounds import Sound
from scipy.io import wavfile

from tensorflow.keras.models import load_model

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
highpass=25

class StreamSpectrogram:
    
    def __init__(self, filename, win=128):
        sound = Sound(filename)        
        self.batch = sound.rate * 60
        self.data  = sound.data
        self.fs    = sound.rate
        if len(self.data.shape) > 1:
            self.data = self.data[:, 0]    
        self.win = win
        
    def __iter__(self):
        self.i = 0
        return self

    def __next__(self):
        if (self.i + 1) * self.batch < len(self.data): 
            start = self.i       * self.batch
            stop  = (self.i + 1) * self.batch
            raw   = self.data[start:stop]
            spec  = fwd_spectrogram(raw, win=512 + 2 * highpass)[:, 0:256] 
            t,d   = spec.shape
            current = []        
            for i in range(self.win, t, self.win // 2):
                x      = np.reshape(spec[i - self.win:i], (self.win, d, 1))
                mu     = np.mean(x)
                std    = np.std(x) + 1.0
                window = (x - mu) / std
                current.append(window)
            self.i += 1
            return np.stack(current)
        else:
            raise StopIteration
     
    def snippet(self, start, stop):
        w = 512 + 2 * highpass
        if start - w > 0:
            return self.data[start - w:stop]
        else:
            return None
    
def fwd_spectrogram(audio, win=512, step=64):
    '''
    Compute the spectrogram of audio data

    audio: one channel audio
    win: window size for dft sliding window
    step: step size for dft sliding windo
    '''
    spectrogram = []
    hanning = np.hanning(win)
    for i in range(win, len(audio), step):
        start = win // 2
        dft = np.abs(fft(audio[i - win: i] * hanning))[start:win]
        spectrogram.append(dft)
    return np.array(spectrogram)

In [3]:
# PASSWORDS AND STUFF HERE

In [4]:
settings = {
   'user': db_user,
   'pass': db_password,
   'host': host,
     'db': db_name
}
url = 'mysql+pymysql://{user}:{pass}@{host}/{db}'.format(**settings)  # 5432 is the default port
db = sqlalchemy.create_engine(url)

In [5]:
def run_query(query):
    with db.connect() as conn:
        rows = []
        for row in conn.execute(query).fetchall():
            rows.append(dict(row.items()))
        return rows

In [6]:
files = run_query("""
    SELECT 
        x.encoding, y.year, x.filename 
    FROM 
        wdp_ds.audio x 
    JOIN wdp_ds.encoding y ON x.encoding = y.encoding;
""")

In [7]:
paths = ["audio_files/{}/{}".format(file['year'], file['filename']) for file in files]

In [8]:
max_id = run_query("""
    SELECT 
        max(id)
    FROM 
        wdp_ds.not_silent
    """)
if max_id[0]['max(id)'] is None:
    max_id = 0
else:
    max_id = max_id[0]['max(id)']
print(max_id)

0


In [9]:
noise_classifier = load_model('../models/lstm_v4/v4.1/sil.h5')
client = storage.Client.from_service_account_json('../secret.json')
bucket = client.get_bucket('wdp-data')

In [10]:
id = max_id + 1
skip = 0
c = 0
for path, file_dict in zip(paths, files):
    if c > skip:
        print(path, file_dict, c)
        with open("/tmp/audio.m4a", "wb") as file_obj:
            blob = bucket.blob(path)
            blob.download_to_file(file_obj)
        stream = StreamSpectrogram("/tmp/audio.m4a")
        not_noise = []
        for x in stream:
            y = noise_classifier.predict(x).flatten()
            not_noise.extend([int(np.round(sample)) == 0 for sample in y])

        regions = []
        for i in range(0, len(not_noise)):
            if not_noise[i]:
                #win: 32, step: 256
                #start = i * 16 * 256
                #stop  = (i + 1) * 16 * 256 
                # win: 128, step: 64
                start = i * 64 * 64
                stop  = (i + 1) * 64 * 64 

                if len(regions) > 0: 
                    last  = regions[-1]
                    if start - last[1] < 48000 * 0.1:
                        start       = regions[-1][0]
                        regions[-1] = (start, stop)
                    else:
                        regions.append((start, stop))
                else:
                    regions.append((start, stop))
        regions = [(start, stop) for start, stop in regions if stop - start > (64 * 64)]
        if len(regions) > 0:
            #for start, stop in regions:
            #    audio = stream.snippet(start, stop)
            #   if audio is not None:
            #       wavfile.write('../data/silence/{}_{}_{}.wav'.format(file_dict['encoding'], start,stop), stream.fs, audio)

            with db.connect() as conn:
                for start, stop in regions:                
                    conn.execute("INSERT INTO wdp_ds.not_silent VALUES ({}, {}, '{}', {}, {})".format(id, file_dict['encoding'], file_dict['filename'], start, stop))  
                    id += 1
        print(file_dict['encoding'], ": ", regions)
    c += 1

5131101 :  [(679936, 729088), (737280, 753664), (831488, 843776), (1720320, 1736704), (1810432, 1839104), (1859584, 1884160), (2027520, 2043904), (2068480, 2076672), (2142208, 2154496), (2183168, 2240512), (2306048, 2318336), (2428928, 2437120), (2531328, 2576384), (2822144, 2830336), (2887680, 2899968), (2936832, 2949120), (2985984, 2994176), (3117056, 3133440), (3166208, 3178496), (3244032, 3252224), (3301376, 3313664), (3567616, 3579904), (3694592, 3702784), (3796992, 3809280), (4255744, 4280320), (4288512, 4321280), (4358144, 4407296), (4448256, 4464640), (4698112, 4734976), (4747264, 4763648), (4771840, 4780032), (4792320, 4800512), (5001216, 5009408), (5021696, 5033984), (5046272, 5054464), (5107712, 5115904), (5124096, 5152768), (5160960, 5173248), (5251072, 5271552), (5304320, 5316608), (5488640, 5517312), (5529600, 5574656), (5623808, 5636096), (5758976, 5767168), (5898240, 5914624), (5955584, 5976064), (6000640, 6033408), (6770688, 6799360), (6868992, 6877184), (6885376, 6893

KeyboardInterrupt: 