### Imports

In [1]:
import sys
import os
sys.path.append("..\\")
from src.features import mfcc

In [2]:
# Define the window size and hop size in seconds
window_size = 0.025  # 25 milliseconds
hop_size = 0.010  # 10 milliseconds

# Calculate win_length based on the window_size and sample_rate
sample_rate = 16000
win_length = int(window_size * sample_rate)

In [12]:
# Essential directories
root_directory = '..\\data\\raw\\audio_classified\\'
target_dir = '..\\data\\processed\\'
label_to_index = {'Fluent': 0, 'Nonfluent': 1}


## Audio feature extraction

### Load data

In [4]:
# load & generate MFCC features
dataloader = mfcc.get_dataloader(root_directory, label_to_index)



#### Save processed dataset in pickle file

In [5]:
if not os.path.exists(target_dir):
    os.makedirs(target_dir)

mfcc.save_mfcc_features(dataloader, target_dir)

### Load processed dataset

In [6]:
features, labels = mfcc.load_mfcc_features(target_dir)
print(f"Features shape: {features.shape},\n \
      Labels shape: {labels.shape},\n \
      Number of classes: {len(set(labels))},\n \
      Number of samples: {len(features)}")

Features shape: (350, 1, 250, 400),
       Labels shape: (350,),
       Number of classes: {0, 1},
       Number of samples: 350


### Train-test split

In [8]:
# Imports
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from src.features.triplets import *

seed = 42

In [9]:
# Normalize feature matrix
mean = np.mean(features, axis=0)
std = np.std(features, axis=0)
# normalized_features = (all_features - mean) / std
normalized_features = (features - mean) / (std + 1e-8)  # Add a small epsilon to prevent division by zero

# Encode class labels as integers
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Split the data into training, validation, and test sets
X_train, x_temp, Y_train, y_temp = train_test_split(normalized_features, labels_encoded, test_size=0.2, random_state=seed)
X_val, X_test, Y_val, Y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=seed)

print(f" \
      Train set: {X_train.shape} \n \
      Train label: {Y_train.shape} \n \
      Validation set: {X_val.shape} \n \
      Validation label: {Y_val.shape} \n \
      Test set: {X_test.shape} \n \
      Test label: {Y_test.shape} \n \
")

       Train set: (280, 1, 250, 400) 
       Train label: (280,) 
       Validation set: (35, 1, 250, 400) 
       Validation label: (35,) 
       Test set: (35, 1, 250, 400) 
       Test label: (35,) 
       


### Generate Triplets

In [10]:
# Generate triplets for training
train_triplets, train_labels = generate_triplets(X_train, Y_train)

# NOTE: Size ~ 1.1GB

# For validation & test set | Probably unnecessary
val_triplets, val_labels = generate_triplets(X_val, Y_val)
test_triplets, test_labels = generate_triplets(X_test, Y_test)

#### Save Triplets

In [None]:
# NOTE: Modify functions if I figure out val and test triplets are unnecessary.
save_triplets(
    (
        train_triplets, 
        train_labels,
        val_triplets,
        val_labels,
        test_triplets,
        test_labels
    ),
    target_dir
)

#### Load Triplets

In [None]:
# NOTE: Looks clumsy with too many variables to unpack
train_triplets, train_labels, val_triplets, val_labels, test_triplets, test_labels = load_triplets()

## Initializing Deep Learning network

In [None]:
# imports
from dataclasses import dataclass
import torch


In [None]:
@dataclass
class TrainingConfig:
    """
    Describes configuration of the training process
    """

    num_classes: int = 3
    batch_size: int = 16
    img_size: Tuple = (224, 224)
    epochs_count: int = 300
    init_learning_rate: float = 0.001 # Initial learning rate
    data_root: str = r"/kaggle/input/opencv-pytorch-project-1-classification/dataset"
    num_workers: int = 2
    device: str = "cuda"
        
    # Decay rate
    decay_rate: float = 0.1

    # For tensorboard logging and saving checkpoints
    save_model_name: str = "cat_dog_panda_classifier.pt"
    root_log_dir: str = os.path.join("Logs_Checkpoints", "Model_logs")
    root_checkpoint_dir: str = os.path.join("Logs_Checkpoints", "Model_checkpoints")

    # Current log and checkpoint directory.
    log_dir: str = "version_0"
    checkpoint_dir: str = "version_0"

In [None]:
def setup_system(system_config: SystemConfig) -> None:
    torch.manual_seed(system_config.seed)
    if torch.cuda.is_available():
        torch.backends.cudnn_benchmark_enabled = system_config.cudnn_benchmark_enabled
        torch.backends.cudnn.deterministic = system_config.cudnn_deterministic

In [None]:
@dataclass
class SystemConfig:
    """
    Describes the common system setting needed for reproducible training
    """

    seed: int = 21  # Seed number to set the state of all random number generators
    cudnn_benchmark_enabled: bool = True  # Enable CuDNN benchmark for the sake of performance
    cudnn_deterministic: bool = True  # Make cudnn deterministic (reproducible training)


In [None]:
# imports
from src.models import train, validate, test

In [None]:
train()

In [None]:
validate()

In [None]:
test()