# Construct audio from accelerometer data

In [1]:
from tensorflow.keras import layers, losses
from tensorflow.python.keras.models import Model
%config IPCompleter.greed=True

import tensorflow as tf
import matplotlib.pyplot as plt
import util
import numpy as np

from util import *

  exec(code_obj, self.user_global_ns, self.user_ns)


## Load the dataset

Load accelerometer data

In [2]:
raw, label = read_radio_file('data/recordings')

## Preprocess dataset

Make dataset's length equal and apply Fourier transform.

In [3]:
# Config Fourier transform
WINDOW_SIZE = 256
SAMPLE_RATE = 8000
SAMPLE_NUM = 5120
OVERLAP = WINDOW_SIZE // 2

util = Util(WINDOW_SIZE, SAMPLE_RATE, SAMPLE_NUM, OVERLAP)

wave = util.cut(raw)
spec = util.ft(wave)

spec = spec[:, 2:, 1:-1, :]
n_data, spec_length, time_length, channel_num = spec.shape

X_train = spec
input_shape = (spec_length, time_length, channel_num)

## Build and train the model

We use Autoencoders to reconstruct audio

In [4]:
class Denoise(Model):
    def __init__(self, input_shape):
        super(Denoise, self).__init__()
        self.encoder = tf.keras.Sequential([
            layers.Input(shape=input_shape),
            layers.Conv2D(64, (3,3), activation='relu', padding='same'),
            layers.Conv2D(32, (3,3), activation='relu', padding='same'),
            layers.Conv2D(16, (3,3), activation='relu', padding='same'),
        ])
        
        self.decoder = tf.keras.Sequential([
            layers.Conv2DTranspose(16, 3, activation='relu', padding='same'),
            layers.Conv2DTranspose(32, 3, activation='relu', padding='same'),
            layers.Conv2DTranspose(64, 3, activation='relu', padding='same'),
            layers.Conv2D(1, (3,3), activation='relu', padding='same'),
        ])
        
    def call(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded
        
autoencoder = Denoise(input_shape)
autoencoder.encoder.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 127, 39, 64)       640       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 127, 39, 32)       18464     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 127, 39, 16)       4624      
Total params: 23,728
Trainable params: 23,728
Non-trainable params: 0
_________________________________________________________________


In [5]:
autoencoder.compile(optimizer='adam', loss=losses.MeanSquaredError())

In [None]:
autoencoder.fit(X_train, X_train,
                        epochs=5,
                        shuffle=True,
                        validation_split=0.2)

Epoch 1/5
Epoch 2/5

In [None]:
autoencoder.decoder.summary()

In [None]:
encoded_imgs = autoencoder.encoder(X_train).numpy()
decoded_imgs = autoencoder.decoder(encoded_imgs).numpy()

n = 2
plt.figure(figsize=(20, 4))
for i in range(n):

    # display original + noise
    ax = plt.subplot(2, n, i + 1)
    plt.title("original + noise")
    plt.imshow(X_train[i, :, :, 0])
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    bx = plt.subplot(2, n, i + n + 1)
    plt.title("reconstructed")
    plt.imshow(decoded_imgs[i, :, :, 0])
    bx.get_xaxis().set_visible(False)
    bx.get_yaxis().set_visible(False)
plt.show()