# Experiment about signal preprocessing for transformer model

- should I resize data to the same shape, ie `(n_timesteps, n_features)`

- Transformer should take this data as input without problems

    - train, predict `.fit`
    - inference `model(x)`

# Prepare ENV/Load libraries

## Mount Google Drive

In [None]:
import os
from google.colab import drive

drive.mount('/content/gdrive')
google_drive_path = "/content/gdrive/MyDrive/"

# change directory to the project path

project_path = os.path.join(google_drive_path, "Colab Notebooks/SER/")
os.chdir(project_path)
os.listdir("./")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


['utils',
 'fonts',
 'meta_data',
 'dataset',
 '.git',
 '.env',
 'config.yml',
 'libs',
 'README.md',
 'models',
 'Notebook Template.ipynb',
 '.gitignore',
 'Preprocess Json to CSV.ipynb',
 'logs',
 'HyperParams Tuning Train Transformer.ipynb',
 'Tensorboard vis.ipynb',
 'HyperParams Tuning Train LSTM.ipynb',
 'Untitled0.ipynb',
 'Multi-Layer Transformer.ipynb',
 'SER Model idea.drawio',
 'Experiment Preprocessing for Transformer.ipynb']

In [None]:
# !rm -rf tmp && git clone https://github.com/chuan-khuna/my-python-utils.git tmp && cp -R tmp/utils ./ && cp -R tmp/fonts ./ && rm -rf tmp

In [None]:
!python3 --version

Python 3.8.16


In [None]:
!apt install --allow-change-held-packages libcudnn8=8.1.0.77-1+cuda11.2
!pip uninstall -y -q tensorflow keras tensorflow-estimator tensorflow-text
!pip install -q tensorflow_datasets
!pip install -q -U tensorflow-text tensorflow

Reading package lists... Done
Building dependency tree       
Reading state information... Done
libcudnn8 is already the newest version (8.1.0.77-1+cuda11.2).
The following package was automatically installed and is no longer required:
  libnvidia-common-460
Use 'apt autoremove' to remove it.
0 upgraded, 0 newly installed, 0 to remove and 18 not upgraded.
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tf2onnx 1.13.0 requires flatbuffers<3.0,>=1.12, but you have flatbuffers 22.12.6 which is incompatible.
onnx 1.13.0 requires protobuf<4,>=3.20.2, but you have protobuf 3.19.6 which is incompatible.[0m


In [None]:
!pip install matplotlib seaborn -Uq
!pip install pythainlp -q
!pip install keras-tuner -q
!pip install pyYAML -q
!pip install tensorflow-io[tensorflow] tf2onnx onnxruntime -Uq

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-text 2.11.0 requires tensorflow<2.12,>=2.11.0; platform_machine != "arm64" or platform_system != "Darwin", but you have tensorflow 2.9.0 which is incompatible.[0m


In [None]:
%load_ext tensorboard

# Import Libraries

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib
from matplotlib import patheffects
import seaborn as sns

import json
import yaml
import re
from tqdm.notebook import trange, tqdm

import datetime

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import keras_tuner
from tensorflow.keras.layers import *
from tensorflow import keras

seed_ = 20200218
tf.random.set_seed(seed_)
np.random.seed(seed_)

from sklearn.metrics import confusion_matrix, classification_report

In [None]:
from utils.tf_layers.transformer_arch.model import Encoder
from utils.tf_layers.transformer_arch.transformer import TransformerEncoderBlock as TransformerEncoder
from utils.tf_layers.transformer_arch.embedding import FixedPositionalEncoding

## Ensure that matplotlib can use subplot mosaic

In [None]:
def check_version(version_str, major, minor):
    print(version_str)
    version = [int(i) for i in version_str.split('.')]
    assert version[0] >= major and version[1] >= minor

check_version(matplotlib.__version__, 3, 6)
check_version(sns.__version__, 0, 12)

del check_version

matplotlib.__version__, sns.__version__, tf.__version__

3.6.2
0.12.1


('3.6.2', '0.12.1', '2.9.0')

In [None]:
import tf2onnx
import onnx
import onnxruntime as rt

In [None]:
import librosa
from librosa.display import specshow
import tensorflow_io as tfio
from IPython.display import Audio
import tensorflow_hub as hub

In [None]:
from utils.vis_utils import *
from libs.audio_preprocessing import AudioPreprocessor

font_dir = [f"{google_drive_path}/code_assets/fonts/"]
mpl_import_fonts(font_dir)

In [None]:
def get_timestamp():
    timestamp = datetime.datetime.strftime(datetime.datetime.now(), "%Y%m%d-%H%M")
    return timestamp

get_timestamp()

'20221223-0700'

## View hardware spec

In [None]:
!nvidia-smi

Fri Dec 23 07:00:16 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   60C    P0    30W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
tf.config.list_physical_devices('GPU')

for device in tf.config.experimental.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(device, True)

## Config Tensorflow precision

In [None]:
# use mixed precision

# policy = tf.keras.mixed_precision.Policy('mixed_float16')
# tf.keras.mixed_precision.set_global_policy(policy)

# print('Compute dtype: %s' % policy.compute_dtype)
# print('Variable dtype: %s' % policy.variable_dtype)

# Load Dataframe

In [None]:
df = pd.read_csv("./meta_data/dataset.csv")

In [None]:
# drop emotion label that tagged as None
df = df[df['majority_emo'] != 'None']
df = df[df['majority_emo'] != 'other']
df = df[df['agreement'] >= 0.6]

# shuffle
df = df.sample(frac=0.5)

In [None]:
label_col = "majority_emo"

In [None]:
label_col = "majority_emo"
df = df.join(pd.get_dummies(df[label_col]))

In [None]:
df

Unnamed: 0,path,file,assigned_emo,majority_emo,agreement,Angry,Frustrated,Happy,Neutral,Sad
13355,./dataset/studio11-20/studio014/con/s014_con_a...,s014_con_actor027_script1_2_4a.flac,Sad,Sad,1.000000,0,0,0,0,1
18316,./dataset/zoom1-10/zoom005/mic/z005_mic_actor0...,z005_mic_actor046_impro4_18.flac,Frustrated,Frustrated,0.857143,0,1,0,0,0
14723,./dataset/studio11-20/studio016/con/s016_con_a...,s016_con_actor031_script1_1_5b.flac,Frustrated,Frustrated,0.833333,0,1,0,0,0
2862,./dataset/zoom11-20/zoom019/mic/z019_mic_actor...,z019_mic_actor074_script2_1_4a.flac,Sad,Neutral,0.625000,0,0,0,1,0
13443,./dataset/studio11-20/studio014/con/s014_con_a...,s014_con_actor028_impro2_17.flac,Happy,Happy,0.714286,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...
20960,./dataset/studio31-40/studio038/con/s038_con_a...,s038_con_actor115_impro7_12.flac,Sad,Sad,1.000000,0,0,0,0,1
32507,./dataset/studio21-30/studio021/con/s021_con_a...,s021_con_actor078_impro8_5.flac,Neutral,Neutral,1.000000,0,0,0,1,0
19050,./dataset/zoom1-10/zoom010/mic/z010_mic_actor0...,z010_mic_actor056_impro3_12.flac,Neutral,Neutral,0.800000,0,0,0,1,0
11056,./dataset/studio11-20/studio011/con/s011_con_a...,s011_con_actor021_impro4_7.flac,Angry,Frustrated,0.750000,0,1,0,0,0


In [None]:
classes = pd.get_dummies(df[label_col]).columns
num_classes = len(classes)
list(classes)

['Angry', 'Frustrated', 'Happy', 'Neutral', 'Sad']

In [None]:
class_weight = {}
for i, c in enumerate(list(classes)):
    class_size = (df[label_col] == c).sum()
    class_weight[i] = np.round(1/class_size * len(df)/2.0, 3)

class_weight

{0: 3.359, 1: 1.785, 2: 2.59, 3: 1.846, 4: 4.666}

In [None]:
# No class_weight

# class_weight = None

In [None]:
def show_tensor(tensor, take_n=3):
    return list(tensor.take(take_n).as_numpy_iterator())

In [None]:
def plot_spectrogram(tensor, transpose=True):
    fig = plt.figure(figsize=(9, 3), dpi=100)
    data = tensor.numpy().squeeze()
    if transpose:
        data = data.T
    sns.heatmap(data)
    plt.show()

# Load Tensorflow dataset from df

In [None]:
yamnet = hub.load('https://tfhub.dev/google/yamnet/1')

In [None]:
def preprocess_audio(file_name, target_rate=16000):
    content = tfio.audio.AudioIOTensor(file_name, dtype=tf.int16)
    rate = tf.cast(content.rate, dtype=tf.int64)
    audio_tensor = content.to_tensor()

    audio_tensor = tfio.audio.resample(audio_tensor, rate, target_rate)
    audio_tensor = audio_tensor[:, 0] / tf.int16.max

    score, embedding, spectrogram = yamnet(audio_tensor)

    return audio_tensor, spectrogram, tf.shape(audio_tensor)

In [None]:
files = df['path'].sample(5).values

In [None]:
RATE = 16000

In [None]:
path_ds = tf.data.Dataset.from_tensor_slices(df['path'])
wave_ds = path_ds.map(lambda f: preprocess_audio(f, target_rate=RATE)[0])
spec_ds = path_ds.map(lambda f: preprocess_audio(f, target_rate=RATE)[1])



In [None]:
lengths = []
spec_lengths = []

In [None]:
wave_length_ds = wave_ds.map(lambda w: tf.shape(w)[0])
spec_length_ds = spec_ds.map(lambda s: tf.shape(s)[0])

In [None]:
for s in spec_length_ds.take(-1).as_numpy_iterator():
    spec_lengths.append(s)

for s in wave_length_ds.take(-1).as_numpy_iterator():
    lengths.append(s)

In [None]:
pd.Series(spec_lengths).describe()

In [None]:
pd.Series(lengths).describe()/RATE

In [None]:
spec_mins = []

for s in spec_ds.take(-1).as_numpy_iterator():
    spec_mins.append(tf.reduce_min(s))