In [1]:
import sqlalchemy
import numpy as np

from google.cloud import storage
from numpy.fft import fft, ifft
from sksound.sounds import Sound

from tensorflow.keras.models import load_model

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
def fwd_spectrogram(audio, win=512, step=256):
    '''
    Compute the spectrogram of audio data

    audio: one channel audio
    win: window size for dft sliding window
    step: step size for dft sliding windo
    '''
    spectrogram = []
    hanning = np.hanning(win)
    for i in range(win, len(audio), step):
        start = win // 2
        dft = np.abs(fft(audio[i - win: i] * hanning))[start:win]
        spectrogram.append(dft)
    return np.array(spectrogram)

def spectrogram_from_file(filename, win=512, step=256, filter=25):
    '''
    Read audio and convert to z-normalized spectrogram  
    filename: path to the file
    max_len: clip files
    '''
    
    sound = Sound(filename) 
    data  = sound.data
    if len(data.shape) > 1:
        data = data[:, 0]    
    spec =fwd_spectrogram(data)
    spec[:filter] = 0.0
    return spec

def data_gen(file, win):
    spec = spectrogram_from_file(file) 
    (t, d) = spec.shape
    for i in range(win, t, win // 2):
        x = np.reshape(spec[i - win:i], (win,d,1))
        mu  = np.mean(x)
        std = np.std(x) + 1.0
        yield (x - mu) / std


In [4]:
# PASSWORDS AND STUFF HERE

In [5]:
settings = {
   'user': db_user,
   'pass': db_password,
   'host': host,
     'db': db_name
}
url = 'mysql+pymysql://{user}:{pass}@{host}/{db}'.format(**settings)  # 5432 is the default port
db = sqlalchemy.create_engine(url)

In [6]:
def run_query(query):
    with db.connect() as conn:
        rows = []
        for row in conn.execute(query).fetchall():
            rows.append(dict(row.items()))
        return rows

In [7]:
files = run_query("""
    SELECT 
        x.encoding, y.year, x.filename 
    FROM 
        wdp_ds.audio x 
    JOIN wdp_ds.encoding y ON x.encoding = y.encoding;
""")

In [8]:
paths = ["audio_files/{}/{}".format(file['year'], file['filename']) for file in files]

In [18]:
max_id = run_query("""
    SELECT 
        max(id)
    FROM 
        wdp_ds.not_silent
    """)
max_id = max_id[0]['max(id)']

In [None]:
noise_classifier = load_model('sil.h5')
client = storage.Client.from_service_account_json('secret.json')
bucket = client.get_bucket('wdp-data')
id = max_id
skip_until = 5281102
process    = False
for path, file_dict in zip(paths, files):
    if file_dict['encoding'] == 5281102 or skip_until is None:
        process = True
    if process:
        blob = bucket.blob(path)
        with open("/tmp/audio.m4a", "wb") as file_obj:
            blob.download_to_file(file_obj)
        x = np.stack([x for x in data_gen("/tmp/audio.m4a", 32)])   
        y = noise_classifier.predict(x).flatten()
        not_noise = [sample == 0 for sample in y]
        regions = []
        for i in range(0, len(not_noise)):
            if not_noise[i]:
                start = i * 16 * 256 
                stop  = (i + 1) * 16 * 256 
                if len(regions) > 0: 
                    last  = regions[-1]
                    if start - last[1] < 48000 * 0.75:
                        start       = regions[-1][0]
                        regions[-1] = (start, stop)
                    else:
                        regions.append((start, stop))
                else:
                    regions.append((start, stop))
        regions = [(start, stop) for start, stop in regions if stop - start > (16 * 256)]
        if len(regions) > 0:
            with db.connect() as conn:
                for start, stop in regions:                
                    conn.execute("INSERT INTO wdp_ds.not_silent VALUES ({}, {}, {}, {})".format(id, file_dict['encoding'], start, stop))  
                    id += 1
        print(file_dict['encoding'], ": ", regions)

Infile converted from .m4a to ".wav"
data read in!
