# Prepare ENV/Load libraries

In [1]:
# !rm -rf tmp && git clone https://github.com/chuan-khuna/my-python-utils.git tmp && cp -R tmp/utils ./ && cp -R tmp/fonts ./ && rm -rf tmp

In [2]:
!python3 --version

Python 3.8.16


In [3]:
!pip install matplotlib seaborn -Uq
!pip install pythainlp -q
!pip install keras-tuner -q
!pip install pyYAML -q

## Mount Google Drive

In [4]:
import os
from google.colab import drive

drive.mount('/content/gdrive')
google_drive_path = "/content/gdrive/MyDrive/"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [5]:
# change directory to the project path

project_path = os.path.join(google_drive_path, "Colab Notebooks/SER ME/")
os.chdir(project_path)
os.listdir("./")

['utils',
 'fonts',
 'Notebook Template.ipynb',
 'meta_data',
 'Preprocess Json to CSV.ipynb',
 'dataset',
 '.git',
 'config.yml',
 'README.md',
 '.gitignore']

# Import Libraries

In [6]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib
from matplotlib import patheffects
import seaborn as sns

import json
import yaml
import re
from tqdm.notebook import trange, tqdm

import datetime

In [7]:
import tensorflow as tf
import tensorflow_datasets as tfds
import keras_tuner

seed_ = 20200218
tf.random.set_seed(seed_)
np.random.seed(seed_)

from sklearn.metrics import confusion_matrix, classification_report

In [8]:
from utils.vis_utils import *

font_dir = [f"{google_drive_path}/code_assets/fonts/"]
mpl_import_fonts(font_dir)

## Ensure that matplotlib can use subplot mosaic

In [9]:
def check_version(version_str, major, minor):
    print(version_str)
    version = [int(i) for i in version_str.split('.')]
    assert version[0] >= major and version[1] >= minor

check_version(matplotlib.__version__, 3, 6)
check_version(sns.__version__, 0, 12)

del check_version

matplotlib.__version__, sns.__version__, tf.__version__

3.6.2
0.12.1


('3.6.2', '0.12.1', '2.9.2')

## View hardware spec

In [10]:
!nvidia-smi

NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.



In [11]:
tf.config.list_physical_devices('GPU')

for device in tf.config.experimental.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(device, True)

# Preprocess Json to CSV

In [12]:
with open("config.yml") as f:
    config = yaml.load(f, yaml.Loader)

In [13]:
raw_df = pd.read_json("./meta_data/emotion_label.json", orient='index')
raw_df = raw_df.reset_index().join(pd.json_normalize(raw_df[0]))
raw_df = raw_df.drop(columns=[0, 'annotated'])
raw_df = raw_df.rename(columns={'index': 'file'})

In [14]:
raw_df = pd.read_csv("./meta_data/emotion_label.csv")

In [15]:
raw_df

Unnamed: 0,file,assigned_emo,majority_emo,agreement
0,s001_con_actor001_impro1_1.flac,Neutral,Neutral,1.000000
1,s001_con_actor001_impro1_10.flac,Neutral,Neutral,1.000000
2,s001_con_actor001_impro1_11.flac,Neutral,Neutral,0.857143
3,s001_con_actor001_impro1_12.flac,Neutral,Neutral,1.000000
4,s001_con_actor001_impro1_13.flac,Neutral,Neutral,0.875000
...,...,...,...,...
27849,z020_mic_actor076_script3_2_3b.flac,Happy,Happy,0.600000
27850,z020_mic_actor076_script3_2_4a.flac,Sad,Frustrated,0.750000
27851,z020_mic_actor076_script3_2_4b.flac,Sad,Frustrated,0.500000
27852,z020_mic_actor076_script3_2_5a.flac,Frustrated,,0.000000


# Search for available files

In [16]:
audio_files = []

for path, subdirs, files in os.walk("./dataset/"):
    for name in files:
        file = os.path.join(path, name)
        audio_files.append(file)

audio_files = [file for file in audio_files if file.endswith(".flac")]

# Merge available files with corresponding labels

In [17]:
df = pd.DataFrame({'path': audio_files})

In [18]:
df['file'] = df['path'].apply(lambda x: x.split('/')[-1])

In [19]:
df = df.join(raw_df.set_index('file'), on='file')

In [20]:
df

Unnamed: 0,path,file,assigned_emo,majority_emo,agreement
0,./dataset/zoom1-10/zoom003/mic/z003_mic_actor0...,z003_mic_actor041_impro2_6.flac,Happy,Happy,1.00
1,./dataset/zoom1-10/zoom003/mic/z003_mic_actor0...,z003_mic_actor042_impro4_4.flac,Frustrated,Frustrated,0.75
2,./dataset/zoom1-10/zoom005/mic/z005_mic_actor0...,z005_mic_actor045_impro2_38.flac,Happy,Happy,1.00
3,./dataset/zoom1-10/zoom002/mic/z002_mic_actor0...,z002_mic_actor040_impro5_3.flac,Sad,Frustrated,0.60
4,./dataset/zoom1-10/zoom002/mic/z002_mic_actor0...,z002_mic_actor040_impro3_6.flac,Neutral,Neutral,0.50
...,...,...,...,...,...
1295,./dataset/zoom1-10/zoom006/mic/z006_mic_actor0...,z006_mic_actor047_impro1_8.flac,Neutral,Neutral,1.00
1296,./dataset/zoom1-10/zoom006/mic/z006_mic_actor0...,z006_mic_actor048_script3_1_4a.flac,Sad,Sad,1.00
1297,./dataset/zoom1-10/zoom006/mic/z006_mic_actor0...,z006_mic_actor048_script3_2_5a.flac,Frustrated,Frustrated,0.75
1298,./dataset/zoom1-10/zoom006/mic/z006_mic_actor0...,z006_mic_actor048_impro2_11.flac,Happy,Happy,0.90


In [21]:
df.to_csv("./meta_data/dataset.csv", index=False)