<a href="https://colab.research.google.com/github/compi1234/pyspch/blob/master/test/spectrogram_test_ly.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Interactive Spectrogram 

In this demo we view speech & audio in the Time-Frequency Domain.

Possible spectral representations are:
- classical spectrogram
- mel spectrogram

You may need to call the iSpectrogram routines with different parameters, that better suit your computer terminal
- size is a percentage of the max display size that is possible with your current notebook setup
- the dpi parameter controls the granularity of the plot and to some extent the size of the plot vs. the controls as well

Suggested Files to choose from (within the default root directory 'https://homes.esat.kuleuven.be/~spchlab/data/'):
- misc/friendly.wav  ... a 1 second speech fragment
- misc/train.wav     ... a train whistle
- timit/si1027.wav   ... an example sentence from the TIMIT corpus

You can set another URL root with the parameter 'root'


In [1]:
# Do the imports #
##################
#
%matplotlib inline
import os,sys 
import numpy as np
import pandas as pd
from IPython.display import display, Audio, HTML, clear_output
import ipywidgets as widgets
from ipywidgets import HBox, VBox, Layout
#
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

import pyspch.spectrogram as specg
import pyspch.audio as audio
import pyspch.utils as spchu
                    
import matplotlib.pyplot as plt
import pyspch.display as spchd

In [6]:
def box_layout():
     return Layout(
        border='solid 1px black',
        margin='0px 10px 10px 0px',
        padding='5px 5px 5px 5px'
     )
dw_5 = {'description_width': '50%'}
dw_4 = {'description_width': '40%'}
dw_3 = {'description_width': '30%'}
dw_2 = {'description_width': '20%'}
dw_0 = {'description_width': '0%'}

class iSpectrogram(VBox):
    def __init__(self,shift=0.01,sample_rate=16000,dpi=72,figwidth=10.,size='100%',
                root='https://homes.esat.kuleuven.be/~spchlab/data/',
                fname='misc/friendly.wav'):
        super().__init__()
        self.sample_rate = sample_rate
        self.shift = shift
        self.length = 0.03
        self.preemp = 0.97
        self.nmels = 40
        self.melfb = False
        self.wavdata = None
        self.root = root
        self.fname = fname
        self.wavtimes = [0., 1.]
        self.times = [0., 1.] 
        self.seltimes = [0., 1.] 
        self.frames = [int(self.times[0]/self.shift), int(self.times[1]/self.shift)]
        self.spg = None
        self.nparam = 0
        self.nfr = 0
        self.autoplay = False
        self.dpi = dpi
        self.fig = None
        self.selection = False
        self.sel_width = 0.2
        self.sel_time = 0.5
        self.figwidth = figwidth
        self.layout.width = size

        
        self.wg_fshift = widgets.FloatSlider(value=self.shift,min=0.005,max=0.050,step=0.005,description="Shift(msec)",style=dw_3)
        self.wg_flength = widgets.FloatSlider(value=self.length,min=0.0,max=0.100,step=0.005,description="Length(msec)",style=dw_3)
        self.wg_preemp = widgets.FloatSlider(value=self.preemp,min=0.0,max=1.0,step=0.01,description="Preemphasis",style=dw_3)
        self.wg_melfb = widgets.Checkbox(value=self.melfb,description='Mel Scale',
                                         indent=True,style=dw_0)
        self.wg_nmels = widgets.IntSlider(value=self.nmels,min=10,max=200,step=1,description="#bands",style=dw_2)
                                        
        
        self.wg_fshift.observe(self.fshift_observe,'value')
        self.wg_flength.observe(self.flength_observe,'value')
        self.wg_preemp.observe(self.preemp_observe,'value')        
        self.wg_nmels.observe(self.nmels_observe,'value')
        self.wg_melfb.observe(self.melfb_observe, 'value') 

                   
        self.wg_selection = widgets.Checkbox(value=self.selection,description='Selector',style=dw_0)
        self.wg_autoplay = widgets.Checkbox(value=self.autoplay,description='AutoPlay',style=dw_0)  
            
        # continuous update of these sliders is visually nice but conflicts with the AutoPlay option
        self.wg_seltime = widgets.FloatSlider(value=self.sel_time,min=self.wavtimes[0],max=self.wavtimes[1],step=self.shift,
                            description='Center',continuous_update=False,style=dw_2) 
        self.wg_selwidth = widgets.FloatSlider(value=self.sel_width,min=0.02,max=self.wavtimes[1],step=self.shift,
                            description='Width',continuous_update=False,style=dw_2)
      
                            
        self.wg_fname = widgets.Text(value=self.fname,description="File: ",style=dw_4,continuous_update=False)
      
        
        self.wg_fname.observe(self.fname_observe,'value')    
        self.wg_selection.observe(self.selection_observe,'value')
        self.wg_selwidth.observe(self.selwidth_observe,'value')
        self.wg_seltime.observe(self.seltime_observe,'value')
        self.wg_autoplay.observe(self.autoplay_observe, 'value')
        
        self.wavout = widgets.Output()
        self.audio = widgets.Output()

        self.controls = VBox( [ HBox([ self.wg_fshift,self.wg_flength,self.wg_preemp ]),
                           HBox([ self.wg_melfb, self.wg_nmels],layout=Layout(width='40%')) 
                        ] , layout=box_layout() )
        self.file_controls = VBox( [ self.wg_fname, HBox([ self.audio ]) ],
                                    layout=box_layout())
        self.file_controls.layout.width = '40%'
        self.range_slider = VBox([ 
            HBox([ VBox([self.wg_selection, self.wg_autoplay],layout=Layout(width='50%')), 
                  VBox([self.wg_seltime,  self.wg_selwidth ]) ]), 
            self.wavout ],
            layout=box_layout() )
        self.range_slider.layout.width='60%'
        
        self.out = widgets.Output( layout=Layout(width='100%') )
        self.logscr = widgets.Output()
            
        self.children = [ self.out,  self.controls, HBox([self.range_slider, self.file_controls]), self.logscr ]
        
        self.wav_update()
        self.update()

        
    def wav_update(self):
        self.wavdata, self.sample_rate = audio.load(self.root+self.fname)
        self.wavtimes = [0., len(self.wavdata)*(1./self.sample_rate)]
        self.selection = False
        self.wg_selection.value = self.selection
        self.times = self.wavtimes
        self.seltimes = self.wavtimes
        self.frames = None
        
        self.wg_selwidth.max = self.wavtimes[1]/1.1
        self.sel_width = min(self.sel_width,self.wg_selwidth.max)
        self.wg_selwidth.value = self.sel_width
        
        self.wg_seltime.value = self.wavtimes[1]/2.
        self.wg_seltime.min = self.wavtimes[0] + self.sel_width/2.
        self.wg_seltime.max = self.wavtimes[1] - self.sel_width/2.


        self.wavfig = spchd.PlotWaveform(self.wavdata,sample_rate=self.sample_rate,ylabel=None,xlabel=None,figsize=(10,1.0),dpi=self.dpi)
        #self.wavfig.axes[0].tick_params(labelleft=False)
        #self.wavfig.axes[0].tick_params(labelbottom=False)
        with self.wavout:
            clear_output(wait=True)
            display(self.wavfig)        
        
    def update(self):     
        if self.length < 0.002: self.length=0.002
        if self.shift > self.length: self.shift = self.length
        self.n_shift = int(self.shift*self.sample_rate)
        nmels = None if (self.melfb is False) else self.nmels

        self.spg = specg.spectrogram(self.wavdata,sample_rate=self.sample_rate,f_shift=self.shift,f_length=self.length,preemp=self.preemp,
                                     n_mels=nmels)
        (self.nparam,self.nfr) = self.spg.shape
                
        if(self.selection):
            self.seltimes = [ max(self.wavtimes[0],self.sel_time - self.sel_width/2.0), 
                              min(self.wavtimes[1],self.sel_time + self.sel_width/2.0)]
            self.times = [ max(self.wavtimes[0],(self.sel_time - self.sel_width)), 
                           min(self.wavtimes[1],self.sel_time + self.sel_width)]
            self.frames = [int(self.times[0]/self.shift), int(self.times[1]/self.shift)]
            self.wavfig = spchd.PlotWaveform(self.wavdata,sample_rate=self.sample_rate,ylabel=None,xlabel=None,figsize=(10.,1.0),dpi=self.dpi)
            self.wavfig.axes[0].vlines(self.seltimes,-1.,1.,'r')

            #self.wavfig.axes[0].tick_params(labelleft=False)
            #self.wavfig.axes[0].tick_params(labelbottom=False)
            self.wavfig.axes[0].set_xlabel(None)
            with self.wavout:
                clear_output(wait=True)
                display(self.wavfig)             
        else:
            self.frames = None

        self.fig = spchd.PlotSpg(spgdata=self.spg,wavdata=self.wavdata,sample_rate=self.sample_rate,shift=self.shift,
                       frames=self.frames,figsize=(self.figwidth,0.5*self.figwidth),dpi=self.dpi)
        if(self.selection):
            self.fig.axes[0].vlines(self.seltimes,-1.,1.,'r')
            self.fig.axes[1].vlines(self.seltimes,0,self.nparam,'r')
            #self.spg_plot()
        
        with self.out:
            clear_output(wait=True)
            display(self.fig)
        with self.audio:
            clear_output(wait=True)
            sample_range = [int(self.seltimes[0]*self.sample_rate),int(self.seltimes[1]*self.sample_rate)]
            display(Audio(data=self.wavdata[sample_range[0]:sample_range[1]],rate=self.sample_rate,autoplay=self.autoplay))
            
    def fname_observe(self,change):
        self.fname=change.new
        with self.logscr:
            print(self.fname)
        self.wav_update()
        self.update()

    def selection_observe(self,change):
        self.selection = change.new  # not self.selection
        self.update()
        
    def seltime_observe(self,change):
        self.sel_time = change.new
        self.update()
        
    def selwidth_observe(self,change):
        self.sel_width = change.new
        self.update()
        
    def autoplay_observe(self,change):
        self.autoplay = change.new
        
    def fshift_observe(self,change):
        self.shift = change.new
        self.wg_seltime.step = self.shift
        self.wg_selwidth.step = self.shift
        self.update()
        
    def flength_observe(self,change):
        self.length = change.new
        self.update()
        
    def preemp_observe(self,change):
        self.preemp = change.new
        self.update()

    def nmels_observe(self,change):
        self.nmels = change.new
        self.update()
    
    def melfb_observe(self,obj):
        self.melfb = not self.melfb
        self.update()

In [7]:
iSpg = iSpectrogram(dpi=100,size='100%')
iSpg

iSpectrogram(children=(Output(layout=Layout(width='100%')), VBox(children=(HBox(children=(FloatSlider(value=0.…