In [1]:
!python3 -m pip install PyMySQL
!python3 -m pip install SQLAlchemy
!python3 -m pip install google-cloud-storage
!python3 -m pip install --upgrade --quiet scikit-sound
!python3 -m pip install --upgrade --quiet pygame
!sudo apt-get -y install ffmpeg
!sudo apt-get -y install python3-pymysql

Reading package lists... Done
Building dependency tree       
Reading state information... Done
ffmpeg is already the newest version (7:3.2.14-1~deb9u1).
0 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.
Reading package lists... Done
Building dependency tree       
Reading state information... Done
python3-pymysql is already the newest version (0.7.10-1).
0 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.


In [2]:
import sqlalchemy
import numpy as np

from google.cloud import storage
from numpy.fft import fft, ifft
from sksound.sounds import Sound

from tensorflow.keras.models import load_model

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [3]:
class StreamSpectrogram:
    
    def __init__(self, filename, win=32):
        sound = Sound(filename)        
        self.batch = sound.rate * 60
        self.data  = sound.data
        if len(self.data.shape) > 1:
            self.data = self.data[:, 0]    
        self.win = win
        self.filter = filter
        
    def __iter__(self):
        self.i = 0
        return self

    def __next__(self):
        if (self.i + 1) * self.batch < len(self.data): 
            start = self.i       * self.batch
            stop  = (self.i + 1) * self.batch
            raw   = self.data[start:stop]
            spec  = fwd_spectrogram(raw) 
            t,d   = spec.shape
            current = []        
            for i in range(self.win, t, self.win // 2):
                x      = np.reshape(spec[i - self.win:i], (self.win, d, 1))
                mu     = np.mean(x)
                std    = np.std(x) + 1.0
                window = (x - mu) / std
                current.append(window)
            self.i += 1
            return np.stack(current)
        else:
            raise StopIteration
        
def fwd_spectrogram(audio, win=512, step=256):
    '''
    Compute the spectrogram of audio data

    audio: one channel audio
    win: window size for dft sliding window
    step: step size for dft sliding windo
    '''
    spectrogram = []
    hanning = np.hanning(win)
    for i in range(win, len(audio), step):
        start = win // 2
        dft = np.abs(fft(audio[i - win: i] * hanning))[start:win]
        spectrogram.append(dft)
    return np.array(spectrogram)

In [6]:
# PASSWORDS AND STUFF HERE

In [7]:
settings = {
   'user': db_user,
   'pass': db_password,
   'host': host,
     'db': db_name
}
url = 'mysql+pymysql://{user}:{pass}@{host}/{db}'.format(**settings)  # 5432 is the default port
db = sqlalchemy.create_engine(url)

In [8]:
def run_query(query):
    with db.connect() as conn:
        rows = []
        for row in conn.execute(query).fetchall():
            rows.append(dict(row.items()))
        return rows

In [9]:
files = run_query("""
    SELECT 
        x.encoding, y.year, x.filename 
    FROM 
        wdp_ds.audio x 
    JOIN wdp_ds.encoding y ON x.encoding = y.encoding;
""")

In [10]:
paths = ["audio_files/{}/{}".format(file['year'], file['filename']) for file in files]

In [11]:
max_id = run_query("""
    SELECT 
        max(id)
    FROM 
        wdp_ds.not_silent
    """)
if max_id[0]['max(id)'] is None:
    max_id = 0
else:
    max_id = max_id[0]['max(id)']
print(max_id)

0


In [12]:
noise_classifier = load_model('sil.h5')
client = storage.Client.from_service_account_json('secret.json')
bucket = client.get_bucket('wdp-data')

In [13]:
id = max_id + 1
for path, file_dict in zip(paths, files):
    print(path, file_dict)
    with open("/tmp/audio.m4a", "wb") as file_obj:
        blob = bucket.blob(path)
        blob.download_to_file(file_obj)
    stream = StreamSpectrogram("/tmp/audio.m4a")
    not_noise = []
    for x in stream:
        y = noise_classifier.predict(x).flatten()
        not_noise.extend([int(np.round(sample)) == 0 for sample in y])

    regions = []
    for i in range(0, len(not_noise)):
        if not_noise[i]:
            start = i * 16 * 256 
            stop  = (i + 1) * 16 * 256 
            if len(regions) > 0: 
                last  = regions[-1]
                if start - last[1] < 48000 * 0.1:
                    start       = regions[-1][0]
                    regions[-1] = (start, stop)
                else:
                    regions.append((start, stop))
            else:
                regions.append((start, stop))
    regions = [(start, stop) for start, stop in regions if stop - start > (16 * 256)]
    if len(regions) > 0:
        with db.connect() as conn:
            for start, stop in regions:                
                conn.execute("INSERT INTO wdp_ds.not_silent VALUES ({}, {}, '{}', {}, {})".format(id, file_dict['encoding'], file_dict['filename'], start, stop))  
                id += 1
    print(file_dict['encoding'], ": ", regions)

6111101 :  [(552960, 565248), (573440, 585728), (1699840, 1708032), (2383872, 2392064), (2433024, 2441216), (2473984, 2494464), (2695168, 2707456), (3137536, 3145728), (4956160, 4964352), (4972544, 4980736), (5038080, 5046272), (5054464, 5062656), (5357568, 5369856), (5586944, 5599232), (6762496, 6774784), (7774208, 7782400), (7798784, 7811072), (8105984, 8118272), (8224768, 8232960), (8241152, 8253440), (8441856, 8450048), (8507392, 8544256), (8568832, 8577024), (8736768, 8749056), (8982528, 8990720), (8998912, 9023488), (9097216, 9109504), (9129984, 9162752), (9486336, 9498624), (9662464, 9674752), (9797632, 9805824), (9969664, 9977856), (11100160, 11108352), (11145216, 11161600), (11206656, 11231232), (11264000, 11292672), (11354112, 11378688), (11390976, 11411456), (11419648, 11436032), (11702272, 11710464), (12062720, 12070912), (13361152, 13377536), (13459456, 13471744), (13549568, 13557760), (13590528, 13602816), (13668352, 13680640), (14053376, 14065664), (14168064, 14176256), 