In [172]:
import numpy as np
import matplotlib.pyplot as plt
import librosa.display
import pandas as pd
import soundfile as sf
from pathlib import Path
import re
import json
import matplotlib.animation as animation
import warnings
import wave
import IPython.display as ipd
import warnings

## Desired inspected length 
### Must be a multiply of 10 seconds -  i.e. Moduluos 10 of (End time - start time ) must be 0


In [173]:
start_time = 331 # in seconds, start from second 1 
end_time = 411 # In seconds
thresh = 0.21
seg_length = 20

In [174]:
def check_sanity(start_time, end_time, seg_length):
    if (end_time - start_time)%10 != 0:
        end_time = end_time - ((end_time - start_time)%10)
        print("Non legal signal length! - end_time changed to {}".format(end_time))
    if not ((end_time - start_time) / seg_length).is_integer():
        seg_length = int(seg_length - (seg_length % 10))
        print("Non legal seg_length! - seg_length changed to {}".format(seg_length))
    return seg_length,end_time

In [175]:
seg_length, end_time = check_sanity(start_time, end_time, seg_length)

## Annotation DB

### define paths for annotations and audio files

In [176]:
annotation_csvfile_path = Path('Inference_results-2021-05-04_21-57-47-best-180914_150127.csv')
recording_path = Path('/home/z30g0d/PycharmProjects/data/180914_150127.wav')

### load csv file to pandas df

In [177]:
results = pd.read_csv(annotation_csvfile_path)

In [178]:
results.tail()

Unnamed: 0,class0_prob,class1_prob
483,0.927648,0.072352
484,0.127762,0.872238
485,0.582435,0.417565
486,0.605498,0.394502
487,0.548806,0.451194


In [179]:
prob = results['class1_prob']
time = np.arange(prob.shape[0])
xticks = time

## load audio and spectrogram

### define functions

In [180]:
def play_audio_plot_stft(prob, df_series, audio_path, thresh, seg_length):
    def get_sr(wav_path):
        with wave.open(str(wav_path), "rb") as wave_file:
            return wave_file.getframerate()

    
    def plot_stft(prob, audio_data, sr, num_of_segments, seg_length):
        fig = plt.figure(figsize=(15, 30)) #This setups the figure
        audio_data_dump = []
        ax1 = fig.subplots(num_of_segments, sharex=True) #Creates the Axes object to display one of the plots
        fig.suptitle('Visualization for: ' + str(audio_data))
        m = int(prob.shape[0]/num_of_segments) 

        new_prob = np.array(prob[:(num_of_segments * seg_length)])


        for i in range(num_of_segments):
            if (i == num_of_segments - 1):
                start_time = i*seg_length
                end_time = num_of_segments * seg_length
                
            if (i == num_of_segments):
                start_time = num_of_segments * seg_length
                end_time = prob.shape[0]

                audio_data, _ = sf.read(audio_file_path, start=int(start_time*sr), stop=((end_time*sr) - sr))
                ax2 = ax1[i].twinx() #Creates a second Axes object that shares the x-axis
                ax3 = ax1[i].twinx()

                D = librosa.amplitude_to_db(np.abs(librosa.stft(audio_data)), ref=np.max)
                librosa.display.specshow(D, y_axis='log', x_axis='time', sr=sr, fmax=10000, ax=ax1[i])
                ax2 = plt.plot(prob[start_time:end_time], label='Probability')
                plt.legend()

                decision = [0 if i < thresh else 1 for i in prob[start_time:end_time]]
                print(seg_length, decision)
       
                ax3.set_yticks([])

            start_time = i*seg_length
            end_time = (i+1)*seg_length
            audio_data, _ = sf.read(audio_file_path, start=int(start_time*sr), stop=int((end_time*sr) - sr))
#             for j in audio_data_dump:
            ipd.display(ipd.Audio(audio_data, rate=sr))
            audio_data_dump.append(audio_data)
            ax2 = ax1[i].twinx() #Creates a second Axes object that shares the x-axis
            ax3 = ax1[i].twinx()
        
            D = librosa.amplitude_to_db(np.abs(librosa.stft(audio_data)), ref=np.max)
            librosa.display.specshow(D, y_axis='log', x_axis='time', sr=sr, fmax=10000, ax=ax1[i])
            ax2 = plt.plot(new_prob[start_time:end_time], label='Call Probability')
            decision = [0 if i < thresh else 1 for i in new_prob[start_time:end_time]]

            ax3 = plt.step(np.arange(seg_length), np.array(decision), color='lime', label='Decision')
            plt.yticks([])

            plt.autoscale(enable=True, axis='x', tight=True)
            plt.legend()
            
        return audio_data_dump, decision


    audio_file_path = audio_path
    num_of_segments = int(prob.shape[0]/seg_length)
    start_time = df_series[0]
    end_time = df_series[-1]

    sr = get_sr(audio_file_path)
    audio_data_dump,decision = plot_stft(prob, audio_file_path, sr, num_of_segments , seg_length)
    return audio_data_dump, decision

# This could take a while for long recordings! wait out the time for the visualization!

In [181]:
xticks_sliced = xticks[start_time:end_time]
prob_sliced = prob[start_time:end_time]

## Use '%matplotlib notebook' instead of '%matplotlib qt' in order to plot inside the python notebook

In [182]:
warnings.filterwarnings('ignore')
%matplotlib qt
audio_data_dump, decision = play_audio_plot_stft(prob_sliced, xticks_sliced, recording_path, thresh, seg_length)