##### This script creates spectrogram matrices from wav files

It successfully converted almost all of the .wav files except for a few corrupted files. It was also not able to convert the files from M01 Session 1 headMic, due to some type of end of file issue.

Leveraged the [following code](https://github.com/kykiefer/depression-detect/blob/master/src/features/spectrograms.py), and made edits where needed.

In [1]:
# Import packages

import numpy as np
from numpy.lib import stride_tricks
import os
from PIL import Image
import scipy.io.wavfile as wav

In [2]:
# Location of source .wav files to convert. Each session has two recordings from two different microphones.

dir_names = ['data/TORGO/F01/Session1',
            'data/TORGO/F03/Session1','data/TORGO/F03/Session2','data/TORGO/F03/Session3',
            'data/TORGO/F04/Session1','data/TORGO/F04/Session2',
            'data/TORGO/M01/Session1','data/TORGO/M01/Session2_3',
            'data/TORGO/M02/Session1','data/TORGO/M02/Session2',
            'data/TORGO/M03/Session2',
            'data/TORGO/M04/Session1','data/TORGO/M04/Session2',
            'data/TORGO/M05/Session1','data/TORGO/M05/Session2']
mics = ['wav_headMic','wav_arrayMic']

In [3]:
# Functions for processing

def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
    """
    Short-time Fourier transform of audio signal.
    """
    win = window(frameSize)
    hopSize = int(frameSize - np.floor(overlapFac * frameSize).astype(int))
    # zeros at beginning (thus center of 1st window should be for sample nr. 0)
    samples = np.append(np.zeros(np.floor(frameSize/2.0).astype(int)), sig)
    # cols for windowing
    cols = np.ceil((len(samples) - frameSize) / float(hopSize)).astype(int) + 1
    # zeros at end (thus samples can be fully covered by frames)
    samples = np.append(samples, np.zeros(frameSize))

    frames = stride_tricks.as_strided(samples, shape=(cols, frameSize),
                                      strides=(samples.strides[0]*hopSize,
                                      samples.strides[0])).copy()
    frames *= win

    return np.fft.rfft(frames)


def logscale_spec(spec, sr=44100, factor=20.):
    """
    Scale frequency axis logarithmically.
    """
    timebins, freqbins = np.shape(spec)
    
    scale = np.linspace(0, 1, freqbins) ** factor
    scale *= (freqbins-1)/max(scale)
    scale = np.unique(np.round(scale)).astype(int)
  
    # create spectrogram with new freq bins
    newspec = np.complex128(np.zeros([timebins, len(scale)]))
    for i in range(0, len(scale)):
        if i == len(scale)-1:
            newspec[:, i] = np.sum(spec[:, scale[i]:], axis=1)
        else:
            newspec[:, i] = np.sum(spec[:, scale[i]:scale[i+1]], axis=1)

    # list center freq of bins
    allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
    freqs = []
    for i in range(0, len(scale)):
        if i == len(scale)-1:
            freqs += [np.mean(allfreqs[scale[i]:])]
        else:
            freqs += [np.mean(allfreqs[scale[i]:scale[i+1]])]

    return newspec, freqs


def stft_matrix(audiopath, binsize=2**10, png_name='tmp.png',
                save_png=False, offset=0):
    """
    A function that converts a wav file into a spectrogram represented by a \
    matrix where rows represent frequency bins, columns represent time, and \
    the values of the matrix represent the decibel intensity. A matrix of \
    this form can be passed as input to the CNN after undergoing normalization.
    """
    samplerate, samples = wav.read(audiopath)
    s = stft(samples, binsize)

    sshow, freq = logscale_spec(s, factor=1, sr=samplerate)
    ims = 20.*np.log10(np.abs(sshow)/10e-6)  # amplitude to decibel
    timebins, freqbins = np.shape(ims)

    ims = np.transpose(ims)
    ims = np.flipud(ims)

    if save_png:
        create_png(ims, png_name)

    return ims


def create_png(im_matrix, png_name):
    """
    Save grayscale png of spectrogram.
    """
    image = Image.fromarray(im_matrix)
    image = image.convert('L')  # convert to grayscale
    image.save(png_name)

In [4]:
# Convert files

for d in dir_names:
    for m in mics:
        dir_name = d + '/' + m
        print('Processing directory' + dir_name + '...')
        if not os.path.exists(dir_name + '_spect/'):
            os.makedirs(dir_name + '_spect/')
        for subdir, dirs, files in os.walk(dir_name):
            for file in files:
                if file.endswith('.wav'):
                    wav_file = os.path.join(subdir, file)
                    png_name = subdir + '_spect/' + file[:-4] + '.png'
                    try:
                        stft_matrix(wav_file, png_name=png_name, save_png=True)
                    except:
                        print('WARNING: Unable to convert ' + file)
                        pass

print("Spectrogram Conversion Complete!")

Processing directorydata/TORGO/F01/Session1/wav_headMic...
Processing directorydata/TORGO/F01/Session1/wav_arrayMic...
Processing directorydata/TORGO/F03/Session1/wav_headMic...
Processing directorydata/TORGO/F03/Session1/wav_arrayMic...
Processing directorydata/TORGO/F03/Session2/wav_headMic...


  samplerate, samples = wav.read(audiopath)


Processing directorydata/TORGO/F03/Session2/wav_arrayMic...
Processing directorydata/TORGO/F03/Session3/wav_headMic...


  samplerate, samples = wav.read(audiopath)


Processing directorydata/TORGO/F03/Session3/wav_arrayMic...
Processing directorydata/TORGO/F04/Session1/wav_headMic...
Processing directorydata/TORGO/F04/Session1/wav_arrayMic...
Processing directorydata/TORGO/F04/Session2/wav_headMic...
Processing directorydata/TORGO/F04/Session2/wav_arrayMic...
Processing directorydata/TORGO/M01/Session1/wav_headMic...


  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read

Processing directorydata/TORGO/M01/Session1/wav_arrayMic...
Processing directorydata/TORGO/M01/Session2_3/wav_headMic...
Processing directorydata/TORGO/M01/Session2_3/wav_arrayMic...
Processing directorydata/TORGO/M02/Session1/wav_headMic...
Processing directorydata/TORGO/M02/Session1/wav_arrayMic...
Processing directorydata/TORGO/M02/Session2/wav_headMic...


  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)


Processing directorydata/TORGO/M02/Session2/wav_arrayMic...
Processing directorydata/TORGO/M03/Session2/wav_headMic...


  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)
  samplerate, samples = wav.read(audiopath)


Processing directorydata/TORGO/M03/Session2/wav_arrayMic...
Processing directorydata/TORGO/M04/Session1/wav_headMic...
Processing directorydata/TORGO/M04/Session1/wav_arrayMic...
Processing directorydata/TORGO/M04/Session2/wav_headMic...
Processing directorydata/TORGO/M04/Session2/wav_arrayMic...
Processing directorydata/TORGO/M05/Session1/wav_headMic...


  samplerate, samples = wav.read(audiopath)


Processing directorydata/TORGO/M05/Session1/wav_arrayMic...
Processing directorydata/TORGO/M05/Session2/wav_headMic...


  ims = 20.*np.log10(np.abs(sshow)/10e-6)  # amplitude to decibel


Processing directorydata/TORGO/M05/Session2/wav_arrayMic...
Spectrogram Conversion Complete!


  samplerate, samples = wav.read(audiopath)
