In [1]:
import os
import uuid
import numpy as np
import librosa
import pylab
import io
import json

from io import BytesIO
import base64
from werkzeug.utils import secure_filename
from flask import Flask, request, jsonify, url_for, render_template
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from matplotlib import cm
from librosa import display

In [2]:
ALLOWED_EXTENSION = set(['wav'])
IMAGE_SIZE = (300, 300)


In [3]:
def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1] in ALLOWED_EXTENSION

In [None]:
app = Flask(__name__)
model = load_model('./saved_models/test_model.h5', compile=False)

@app.route('/index/')
def index():
    return render_template('MosquitoNet.html')

@app.route('/api/audio', methods=['POST'])
def upload_audio():
    if 'audio' not in request.files:
        return render_template('MosquitoNet.html', prediction='No posted audio.')
    file = request.files['audio']
    
    if file.filename == '':
        return render_template('MosquitoNet.html', prediction='You did not select an audio.')
    
    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        print("***"+filename)
        
        """
        Step 1: Read audio file
        Step 2: Splice audio file into 1.92s chunks
        Step 3: Remove chunks < 1.92
        Step 4: Convert chunks into Spectrogram images
        Step 5: Get prediction for each image
        Step 6: Get average prediction 
        Step 7: Display results
        """
        
        # Read audio file
        x, sr = librosa.load(BytesIO(file.read()), sr=8000)
        
        # Convert to logmel
        nfft = 2048
        hop_length = 512
        n_mels = 128
        w = 30

        L = librosa.feature.melspectrogram(y=x, sr=sr, n_mels=n_mels, n_fft=nfft, window=w, hop_length=hop_length)

        log_power = librosa.power_to_db(L, ref=np.max)

        # Plotting the spectrogram and save as JPG without axes (just the image)
        pylab.figure(figsize=(3,3))
        pylab.axis('off') 
        pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[]) # Remove the white edge
        librosa.display.specshow(log_power, cmap=cm.jet)

        buf = BytesIO()
        pylab.savefig(buf, bbox_inches=None, pad_inches=0, format="jpg")
        buf.seek(0)
        
        # Encode the image file
        encoded_img_data = base64.b64encode(buf.getvalue())

        # Load the image
        my_image = load_img(buf, target_size=(300, 300))

        # Preprocess image
        my_image = img_to_array(my_image)
        my_image = my_image.reshape((1, my_image.shape[0], my_image.shape[1], my_image.shape[2]))

        # Predict output_spectrogram
        prediction = model.predict(my_image)
        
        # Convert prediction to list
        lst = [arr.tolist() for arr in prediction]
        
        # Un-nest the list
        lst = [item for sublist in lst for item in sublist]
        return render_template('MosquitoNet.html', spectrogram=encoded_img_data.decode('utf-8'), prediction = json.dumps(lst))
    else:
        return render_template('MosquitoNet.html', prediction='Invalid File')
    

if __name__ == '__main__':
    app.run(debug=True, use_reloader=False)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [19/Mar/2023 21:03:43] "GET /index/ HTTP/1.1" 200 -
127.0.0.1 - - [19/Mar/2023 21:03:43] "GET /static/css/style.css HTTP/1.1" 304 -


***220472.wav


127.0.0.1 - - [19/Mar/2023 21:04:00] "POST /api/audio HTTP/1.1" 200 -
127.0.0.1 - - [19/Mar/2023 21:04:00] "GET /static/css/style.css HTTP/1.1" 304 -


***220435.wav


127.0.0.1 - - [19/Mar/2023 21:04:15] "POST /api/audio HTTP/1.1" 200 -
127.0.0.1 - - [19/Mar/2023 21:04:15] "GET /static/css/style.css HTTP/1.1" 304 -
