# Process Sampled Data

This notebook will show how to process the "left-side" and "right-side" wav files. 

Specifically, I will take each one minute clip and build 120 rows (one second each) of a dataset.

This could be further processed, structured, etc. based on the features extracted for a neural network. 

In [None]:
import argparse
import wave
import struct
import wav2vec
import matplotlib.pyplot as plt
import csv
import numpy as np

import scipy
from scipy import signal
from scipy.io.wavfile import read
from scipy.io import wavfile

import librosa
import librosa.display

from matplotlib import cm


In [None]:
def getWav(audio_wav):
    print("getting wav file specs ... ")
    #upload .wav files (sampled from Windows side and copied over to guest side)
    rate, sampleWav0    = read(audio_wav)

    # gets one channel from the wav file input
    sample                                      = np.array(sampleWav0)

    dim = len(sample.shape)
    if dim >= 2:
        sampleWav = sampleWav0[:,0]
        sample = sample[:,0]
    else:
        sampleWav = sampleWav0

    return sample, sampleWav, rate


In [None]:
def plotData(SAMPLE, sample, sample_mfcc):
    # Subplots
    fig, axs                    = plt.subplots(3, 1)

    axs[0].plot(np.abs(SAMPLE))
    axs[1].plot(sample)
    librosa.display.specshow(sample_mfcc, x_axis='time')

    fig.tight_layout()
    plt.show()


In [None]:
def main():
    parser          = argparse.ArgumentParser()
    parser.add_argument('--train_file', '-t', type=str)
    parser.add_argument('--predict_file', '-p', type=str)
    args            = parser.parse_args()

    #filename       = getTestWav() # get test wave
    train_filename        = args.train_file    # get recorded wave
    predict_filename      = args.predict_file

    train, trainWav, rate       = getWav(train_filename)
    predict, predictWav, rate   = getWav(predict_filename)

    # for plotting
    TRAIN           = getFFT(train)
    PREDICT         = getFFT(predict)

    # features for network
    train_mfcc      = getMFCC(train_filename)
    predict_mfcc    = getMFCC(predict_filename)

    #plotData(TRAIN, train, train_mfcc)
    #plotData(PREDICT, predict, predict_mfcc)

    post_train      = generateTrainingSet(train_mfcc)
    post_predict    = generateTrainingSet(predict_mfcc)

    classified_train   = addClassifiers(post_train, 1)
    classified_predict = addClassifiers(post_predict, 0)

    training_data, raw, classifiers = buildTrainingData(classified_train, classified_predict)

    print(training_data.shape)
    print(raw.shape)
    print(classifiers.shape)

    model           = buildLSTM()
    model           = trainLSTM(model, raw, classifiers)

    print("CLASSIFIED TRAIN:")
    print(classified_train.shape)
    print(classified_train)

    raw = classified_train[:1130]
    print("RAW:")
    print(raw.shape)
    print(raw)

    predictLSTM(model, raw)


In [None]:
if __name__ == '__main__':
    main()
              