In [1]:
# Importing required libraries
import pandas as pd
from tqdm.notebook import tnrange
import librosa
import pickle
import ast
import numpy as np
import scipy
import datetime
from tqdm.notebook import tqdm
from prettytable import PrettyTable
from tensorflow.keras.layers import Input, GRU,LSTM, Dense,Masking,Concatenate,GlobalAveragePooling1D,Dropout,Conv2D,TimeDistributed,Conv1D,BatchNormalization,Activation
from tensorflow.keras.models import Model
import tensorflow as tf
from sklearn.metrics import log_loss
from keras.models import model_from_json

In [2]:
class final:
    def load_files(self):
        # This function loads all required files
        #https://machinelearningmastery.com/save-load-keras-deep-learning-models/
        json_file = open("/content/model_1.json", 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        self.model = model_from_json(loaded_model_json)
        # load weights into model
        self.model.load_weights("/content/model_1.h5")
        self.data=pd.read_csv('train_tp_1.csv')

    def spectrogram(self,pad_signal):
        # This function converts raw audio data to time series
        mel = librosa.feature.melspectrogram(y=pad_signal, sr=48000, n_mels=64)
        log_spec = librosa.power_to_db(S=mel, ref=np.max)
        return log_spec

    def pad(self,data):
        # This function pads the data
        max_length=570776
        k=list(data)
        k.extend(0 for i in range(max_length-len(k)))    
        return (np.array(k))

    def load_audio(self,id):
        # This function loads the audio file
        path = '{}.flac'.format(self.data['recording_id'][id])
        audio_signal,sample_rate=librosa.load(path,sr=None) #Audio files are already encoded with sample rate of 
                                                   #  48000, so we don't need to resample them.
        signal=audio_signal[int(self.data['t_min'][id]*48000):int(self.data['t_max'][id]*48000)+1] 
        duration=self.data['t_max'][id]-self.data['t_min'][id]
        return(signal,duration)   

    def normalise(self,spec):
        # This function normalize the data
        mean=-69.59436665357703
        std=19.596147689935908
        spec=(spec-mean)/std
        return(spec)

    def one_hot_encoding(self,true_species):
        # This function one hot encode the true species id in order to calculate the log loss
        one_hot=[0 for i in range(24)]
        one_hot[true_species]=1
        return(one_hot)    

    def logloss(self,actual_species,pred_array):
        # This function calculates log loss
        one_hot=self.one_hot_encoding(actual_species)
        loss=log_loss([one_hot],[pred_array])
        return(loss)

    def final_fun_1(self,id):
        # This function takes index of recording id as input and return the predicted species id
        self.load_files() # Loads files
        start=datetime.datetime.now()
        raw_signal,duration=self.load_audio(id) # Load audio file
        pad_data=self.pad(raw_signal) # Pads the data
        spec=self.spectrogram(pad_data) # 
        spec=spec.reshape(1,64,1115) # Reshapes the data in order to fed to model
        spec=self.normalise(spec) # Converts raw data to spectrogram
        pred_array=self.model.predict(spec)[0] # Predicts probabilities for each species
        pred_label=list(pred_array).index(max(pred_array)) # Calculate the species ID

        x = PrettyTable(["Recording ID","Duration", "Predicted Species ID",'Time Taken'])
        row = [self.data['recording_id'][id],np.round(duration,3),pred_label,datetime.datetime.now()-start]
        x.add_row(row)
        print(x)

    def final_fun_2(self,set):
        '''This function takes a set of tuple as input and return predicted species and logloss for each tuple.
        Each tuple contains index of species ID and actual species id of that recording''' 
        self.load_files()
        x = PrettyTable(["Recording ID","Duration", "Predicted Species ID",'Actual Species ID','Log Loss','Time Taken'])
        for value in set:
            start=datetime.datetime.now()
            raw_signal,duration=self.load_audio(value[0]) # Load files
            pad_data=self.pad(raw_signal) # Pad the data
            spec=self.spectrogram(pad_data) # Convert raw data to spectrogram
            spec=spec.reshape(1,64,1115) # Reshapes the data in order to fed to model
            spec=self.normalise(spec) # Normalize the data
            pred_array=self.model.predict(spec)[0] # Predicts probabilities for each species
            pred_label=list(pred_array).index(max(pred_array)) # Calculate species ID
            actual_species=value[1]
            log=self.logloss(actual_species,list(pred_array)) # Computes log loss
            row = [self.data['recording_id'][value[0]],np.round(duration,3),pred_label,actual_species,log,datetime.datetime.now()-start]
            x.add_row(row)
        print(x)    

In [3]:
test_object=final() #Making object of the class

In [21]:
test_object.final_fun_1(432) 

+--------------+----------+----------------------+----------------+
| Recording ID | Duration | Predicted Species ID |   Time Taken   |
+--------------+----------+----------------------+----------------+
|  5857d6f6d   |  0.896   |          1           | 0:00:00.678392 |
+--------------+----------+----------------------+----------------+


In [22]:
test_object.final_fun_1(762) 

+--------------+----------+----------------------+----------------+
| Recording ID | Duration | Predicted Species ID |   Time Taken   |
+--------------+----------+----------------------+----------------+
|  9f8ebbfc3   |  3.344   |          15          | 0:00:00.727522 |
+--------------+----------+----------------------+----------------+


In [23]:
test_object.final_fun_1(1045) 

+--------------+----------+----------------------+----------------+
| Recording ID | Duration | Predicted Species ID |   Time Taken   |
+--------------+----------+----------------------+----------------+
|  d96242159   |  0.624   |          13          | 0:00:00.706530 |
+--------------+----------+----------------------+----------------+


In [4]:
data=pd.read_csv('train_tp_1.csv')
test_object.final_fun_2([(0,data['species_id'][0]),
                         (63,data['species_id'][63]),
                         (442,data['species_id'][442])])

+--------------+----------+----------------------+-------------------+------------------------+----------------+
| Recording ID | Duration | Predicted Species ID | Actual Species ID |        Log Loss        |   Time Taken   |
+--------------+----------+----------------------+-------------------+------------------------+----------------+
|  003bec244   |  0.587   |          14          |         14        |  0.005025476682931185  | 0:00:00.786134 |
|  0e799da44   |  0.693   |          18          |         18        | 4.965189873473719e-05  | 0:00:00.250484 |
|  59ce91d00   |  1.835   |          16          |         16        | 2.3841887468734058e-06 | 0:00:00.274340 |
+--------------+----------+----------------------+-------------------+------------------------+----------------+
