In [1]:
!python3 -m pip install PyMySQL
!python3 -m pip install SQLAlchemy
!python3 -m pip install google-cloud-storage
!python3 -m pip install --upgrade --quiet scikit-sound
!python3 -m pip install --upgrade --quiet pygame
!sudo apt-get -y install ffmpeg
!sudo apt-get -y install python3-pymysql

Reading package lists... Done
Building dependency tree       
Reading state information... Done
ffmpeg is already the newest version (7:3.2.14-1~deb9u1).
0 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.
Reading package lists... Done
Building dependency tree       
Reading state information... Done
python3-pymysql is already the newest version (0.7.10-1).
0 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.


In [1]:
import sqlalchemy
import numpy as np

from google.cloud import storage
from numpy.fft import fft, ifft
from sksound.sounds import Sound
from scipy.io import wavfile

from tensorflow.keras.models import load_model

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [12]:
highpass=25

class StreamSpectrogram:
    
    def __init__(self, filename, win=128):
        sound = Sound(filename)        
        self.batch = sound.rate * 60
        self.data  = sound.data
        self.fs    = sound.rate
        if len(self.data.shape) > 1:
            self.data = self.data[:, 0]    
        self.win = win
        
    def __iter__(self):
        self.i = 0
        return self

    def __next__(self):
        if (self.i + 1) * self.batch < len(self.data): 
            start = self.i       * self.batch
            stop  = (self.i + 1) * self.batch
            raw   = self.data[start:stop]
            spec  = fwd_spectrogram(raw, win=512 + 2 * highpass)[:, 0:256] 
            t,d   = spec.shape
            current = []        
            for i in range(self.win, t, self.win // 2):
                x      = np.reshape(spec[i - self.win:i], (self.win, d, 1))
                mu     = np.mean(x)
                std    = np.std(x) + 1.0
                window = (x - mu) / std
                current.append(window)
            self.i += 1
            return np.stack(current)
        else:
            raise StopIteration
     
    def snippet(self, start, stop):
        w = 512 + 2 * highpass
        if start - w > 0:
            return self.data[start - w:stop]
        else:
            return None
    
def fwd_spectrogram(audio, win=512, step=64):
    '''
    Compute the spectrogram of audio data

    audio: one channel audio
    win: window size for dft sliding window
    step: step size for dft sliding windo
    '''
    spectrogram = []
    hanning = np.hanning(win)
    for i in range(win, len(audio), step):
        start = win // 2
        dft = np.abs(fft(audio[i - win: i] * hanning))[start:win]
        spectrogram.append(dft)
    return np.array(spectrogram)

In [9]:
# PASSWORDS AND STUFF HERE

In [13]:
settings = {
   'user': db_user,
   'pass': db_password,
   'host': host,
     'db': db_name
}
url = 'mysql+pymysql://{user}:{pass}@{host}/{db}'.format(**settings)  # 5432 is the default port
db = sqlalchemy.create_engine(url)

In [14]:
def run_query(query):
    with db.connect() as conn:
        rows = []
        for row in conn.execute(query).fetchall():
            rows.append(dict(row.items()))
        return rows

In [15]:
files = run_query("""
    SELECT 
        x.encoding, y.year, x.filename 
    FROM 
        wdp_ds.audio x 
    JOIN wdp_ds.encoding y ON x.encoding = y.encoding;
""")

In [16]:
paths = ["audio_files/{}/{}".format(file['year'], file['filename']) for file in files]

In [19]:
max_id = run_query("""
    SELECT 
        max(id)
    FROM 
        wdp_ds.not_silent
    """)
if max_id[0]['max(id)'] is None:
    max_id = 0
else:
    max_id = max_id[0]['max(id)']
print(max_id)

0


In [20]:
noise_classifier = load_model('../models/lstm_v4/v4.1/sil.h5')
client = storage.Client.from_service_account_json('../secret.json')
bucket = client.get_bucket('wdp-data')

In [21]:
id = max_id + 1
skip = 0
c = 0
for path, file_dict in zip(paths, files):
    if c > skip:
        print(path, file_dict, c)
        with open("/tmp/audio.m4a", "wb") as file_obj:
            blob = bucket.blob(path)
            blob.download_to_file(file_obj)
        stream = StreamSpectrogram("/tmp/audio.m4a")
        not_noise = []
        for x in stream:
            y = noise_classifier.predict(x).flatten()
            not_noise.extend([int(np.round(sample)) == 0 for sample in y])

        regions = []
        for i in range(0, len(not_noise)):
            if not_noise[i]:
                #win: 32, step: 256
                #start = i * 16 * 256
                #stop  = (i + 1) * 16 * 256 
                # win: 128, step: 64
                start = i * 64 * 64
                stop  = (i + 1) * 64 * 64 

                if len(regions) > 0: 
                    last  = regions[-1]
                    if start - last[1] < 48000 * 0.1:
                        start       = regions[-1][0]
                        regions[-1] = (start, stop)
                    else:
                        regions.append((start, stop))
                else:
                    regions.append((start, stop))
        regions = [(start, stop) for start, stop in regions if stop - start > (64 * 64)]
        if len(regions) > 0:
            #for start, stop in regions:
            #    audio = stream.snippet(start, stop)
            #   if audio is not None:
            #       wavfile.write('../data/silence/{}_{}_{}.wav'.format(file_dict['encoding'], start,stop), stream.fs, audio)

            with db.connect() as conn:
                for start, stop in regions:                
                    conn.execute("INSERT INTO wdp_ds.not_silent VALUES ({}, {}, '{}', {}, {})".format(id, file_dict['encoding'], file_dict['filename'], start, stop))  
                    id += 1
        print(file_dict['encoding'], ": ", regions)
    c += 1

6121101 :  [(626688, 671744), (770048, 778240), (851968, 970752), (1064960, 1073152), (1155072, 1167360), (1175552, 1183744), (1191936, 1212416), (1298432, 1314816), (1445888, 1495040), (1503232, 1536000), (1576960, 1675264), (1683456, 1724416), (1740800, 1748992), (1826816, 1839104), (2252800, 2260992), (2760704, 2768896), (2969600, 2985984), (3346432, 3395584), (3604480, 3612672), (3784704, 3792896), (3813376, 3833856), (4878336, 4886528), (4894720, 4907008), (4923392, 4935680), (5472256, 5488640), (5545984, 5648384), (5738496, 5758976), (5787648, 5799936), (5816320, 5840896), (5849088, 5865472), (5881856, 5898240), (5906432, 5926912), (6066176, 6086656), (6135808, 6160384), (6819840, 6832128), (7548928, 7561216), (7569408, 7630848), (7798784, 7811072), (7933952, 7954432), (8130560, 8146944), (8163328, 8179712), (8249344, 8269824), (8323072, 8331264), (8724480, 8798208), (8822784, 8851456), (8962048, 8998912), (9015296, 9027584), (9035776, 9076736), (9101312, 9109504), (9121792, 9129

OSError: [Errno 12] Cannot allocate memory