# Meowlib Preprocessing Example
This notebook is an example template of how to use the structured code ("`meowlib`") to preprocess your data for modeling.

# Setup

## Settings

In [None]:
data_dir = '../../data/raw_data/zenodo.4008297/'

In [None]:
seed = 15324

## Imports

In [None]:
import os
import sys

In [None]:
import numpy as np
import pandas as pd
import pydub
from sklearn.pipeline import Pipeline
from sklearn.utils import check_random_state
from torch import Tensor
import torchaudio

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')

In [None]:
# Local files
sys.path.append('../../meowlib/')
import utils, data_handling

## Parse Settings

In [None]:
rng = check_random_state(seed)

# Exploratory Analysis
Here are examples of pre-processing made easy.

## Load Data

### Training Data

In [None]:
# Load the data. The "extension" argument is optional.
data_fps = utils.discover_data(data_dir, extension='wav')

In [None]:
# All files
audio_segments = []
for data_fp in data_fps:
    audio = pydub.AudioSegment.from_file(data_fp)
    audio_segments.append(audio)

In [None]:
# Random single file
i_example = rng.randint(len(audio_segments))
audio_example = audio_segments[i_example]

In [None]:
# Formatted as an array
samples_list = data_handling.WavLoader().fit_transform(data_fps)
width = np.max([sample[0].size for sample in samples_list])
samples = np.zeros((len(samples_list), width))
for i, sample in enumerate(samples_list):
    samples[i, :sample[0].size] = sample[0]

### User Sample

In [None]:
user_data_fp = '../../data/raw_data/zachs_cats/pip_and_chell_wet_food.m4a'
user_audio = pydub.AudioSegment.from_file(user_data_fp)

In [None]:
# User signal
user_arr = np.array(user_audio.get_array_of_samples())
user_arr = user_arr / np.iinfo(user_arr.dtype).max
user_sig = Tensor(user_arr)

## Consistency Between Formats

In [None]:
wavs = data_handling.WavLoader().fit_transform(data_fps)

In [None]:
raw_audio_example = np.array(audio_example.get_array_of_samples())

In [None]:
torch_audio, torch_sample_rate = torchaudio.load(data_fps[i_example])
torch_audio = np.array(torch_audio[0])

In [None]:
fig = plt.figure()
ax = plt.gca()

ax.plot(
    np.arange(torch_audio.size),
    torch_audio,
    alpha = 0.5,
    linewidth=10,
    label='torchaudio',
)

ax.plot(
    np.arange(wavs[i_example][0].size),
    wavs[i_example][0],
    alpha = 0.5,
    linewidth=5,
    label='WavLoader',
)

ax.plot(
    np.arange(raw_audio_example.size),
    raw_audio_example / np.iinfo(raw_audio_example.dtype).max,
    alpha = 0.5,
    label='pydub',
)

ax.legend()

## Extract One-Value-Per-File Entries

In [None]:
attrs = [
    'duration_seconds',
    'max_dBFS',
    'dBFS',
    'max_possible_amplitude',
    'max',
    'frame_rate', 
    'frame_width',
    'rms',
]

In [None]:
data = {}
for audio in audio_segments:
    
    # Easily-accessible values
    for attr in attrs:
        data.setdefault(attr, []).append(getattr(audio, attr))
df = pd.DataFrame(data)

In [None]:
# Overall stats
g = sns.PairGrid(
    df,
    vars=['duration_seconds', 'max_dBFS', 'dBFS', ],
)
g.map_diag(sns.histplot)
g.map_offdiag(sns.scatterplot)

fig = plt.gcf()
ax = plt.gca()

# Show user audio
for ax in fig.axes:
    xlabel = ax.get_xlabel()
    ylabel = ax.get_ylabel()
    
    if ylabel == 'Count':
        ax.axvline(
            getattr(user_audio, xlabel),
        )
    elif xlabel=='' or ylabel=='':
        continue
    else:
        ax.scatter(
            getattr(user_audio, xlabel),
            getattr(user_audio, ylabel),
        )

## Get Rolling Sample from User Sample
Pure audio, just to show an example.

In [None]:
# First, let's resample to the right frequency
resampler = torchaudio.transforms.Resample(user_audio.frame_rate, audio_example.frame_rate)
user_sig = resampler(user_sig)

In [None]:
# Get the window size
window_size_seconds = 4.
window_size = int(window_size_seconds * audio_example.frame_rate)

In [None]:
window_spacing_seconds = 1.
window_spacing = int(window_spacing_seconds * audio_example.frame_rate)

In [None]:
sig_size = user_sig.size()[0]
window_centers = np.arange(window_size//2, sig_size - window_size//2, window_spacing)

In [None]:
# Get the windows
user_arr_resampled = np.array(user_sig)
X = np.array([user_arr_resampled[j-window_size//2:j+window_size//2] for j in window_centers])

In [None]:
# Compare to raw training sample
fig = plt.figure()
ax = plt.gca()

ax.hist(
    np.log10(np.abs(X).max(axis=1)),
    bins=16,
    density=True,
    alpha=0.6,
)

ax.hist(
    np.log10(np.abs(samples).max(axis=1)),
    bins=16,
    density=True,
    alpha=0.6,
)

ax.set_xlabel('log10(max)')

In [None]:
# Compare to raw training sample
fig = plt.figure()
ax = plt.gca()

ax.hist(
    np.log10(np.abs(X).std(axis=1)),
    bins=16,
    density=True,
    alpha=0.6,
)

ax.hist(
    np.log10(np.abs(samples).std(axis=1)),
    bins=16,
    density=True,
    alpha=0.6,
)

ax.set_xlabel('log10(std)')

## Preprocessing Consistency

In [None]:
loader = data_handling.FFMPEGLoader()
specgram_transformer = data_handling.SpecgramTransformer()

In [None]:
data_and_rates = loader.fit_transform(data_fps)
specgrams = specgram_transformer.fit_transform(data_and_rates)

In [None]:
user_data_and_rates = loader.fit_transform([user_data_fp,])
user_specgrams = specgram_transformer.fit_transform(user_data_and_rates)

In [None]:
fig = plt.figure(figsize=(20,10))
ax = plt.gca()

ax.imshow(user_specgrams[0])

In [None]:
# Check the sample rate of the specgrams
spec_rate = np.array([specgrams[i].shape[1] / audio_segments[i].duration_seconds for i in range(len(audio_segments))])

In [None]:
assert np.std(spec_rate) < 0.2, 'Variation in the sample rate is high.'

In [None]:
assert np.isclose(user_specgrams[0].shape[1] / user_audio.duration_seconds, np.mean(spec_rate), atol=0.5), \
    'Difference between user and training is high'

## Full Data Preprocessing Pipeline

In [None]:
preprocessing_pipeline = Pipeline([
    ('load', data_handling.FFMPEGLoader()),
    ('specgram', data_handling.SpecgramTransformer()),
    ('pad', data_handling.PadTransformer()),
])
preprocessing_pipeline

In [None]:
X = preprocessing_pipeline.fit_transform(data_fps)

In [None]:
X.shape

## Full User Data Preprocessing Pipeline

In [None]:
user_preprocessing_pipeline = Pipeline([
    ('load', data_handling.FFMPEGLoader()),
    ('specgram', data_handling.SpecgramTransformer()),
    ('split', data_handling.RollingWindowSplitter()),
    ('pad', data_handling.PadTransformer()),
])
user_preprocessing_pipeline

In [None]:
user_X = user_preprocessing_pipeline.fit_transform([user_data_fp, ])

In [None]:
user_X.shape

In [None]:
assert X.shape[1:] == user_X.shape[1:], 'Inconsistent shapes between user and training data.'

In [None]:
import pydub

In [None]:
pydub.exceptions.CouldntEncodeError