In [25]:
import time
import os
os.environ["OMP_NUM_THREADS"] = "1" # export OMP_NUM_THREADS=4
os.environ["OPENBLAS_NUM_THREADS"] = "1" # export OPENBLAS_NUM_THREADS=4 
os.environ["MKL_NUM_THREADS"] = "1" # export MKL_NUM_THREADS=6
os.environ["VECLIB_MAXIMUM_THREADS"] = "1" # export VECLIB_MAXIMUM_THREADS=4
os.environ["NUMEXPR_NUM_THREADS"] = "1" # export NUMEXPR_NUM_THREADS=6

from numpy import empty
from numpy import load
#import tensorflow as tf

import wandb
from src.training_setup import kfold_cv, train_holdout

# Set CPU as available physical device
import itertools
import numpy as np
import pandas as pd
import itertools
import scipy.fftpack
import tqdm

Set random seed

In [2]:
import sys
sys.path.insert(1, '.')
my_seed = 19951008
import random
random.seed(my_seed)
from numpy.random import seed
seed(my_seed)
from tensorflow import random
random.set_seed(my_seed)

Load data:

In [3]:
%%time
data = np.load('/media/hdd1/khaled/npz_files/final_version/numpy_train_obj_unbalanced.npz', allow_pickle=True)

vectors = []
for x in list(data.keys()):
    vectors.append(data[x])
X, y, X_spk_labels, X_spk_labels_aug, X_aug, y_aug = vectors
X_spk_labels_aug.shape
data.close()
data = None
vectors = None
del data, vectors

X_mel = empty(X.shape, dtype='object')
for i in range(X.shape[0]):
    X_mel[i] = scipy.fftpack.idct(X[i])
X_aug_mel = empty(X_aug.shape, dtype='object')
for i in range(X_aug.shape[0]):
    X_aug_mel[i] = scipy.fftpack.idct(X_aug[i])

CPU times: user 43.9 s, sys: 18.9 s, total: 1min 2s
Wall time: 1min 4s


Load pandas dataframes

In [4]:
train_metadata = pd.read_csv('age-train.txt')
test_metadata = pd.read_csv('age-test.txt')
title_only_metadata = pd.read_csv('age-title_only.txt')

Load test set:

In [5]:
%%time
data = np.load('/media/hdd1/khaled/npz_files/final_version/test_data.npz', allow_pickle=True)

vectors = []
for x in list(data.keys()):
    vectors.append(data[x])
X_test, y_test, X_spk_labels_test = vectors
data.close()
data = None
vectors = None
del data, vectors


X_test_mel = empty(X.shape, dtype='object')
for i in range(X_test.shape[0]):
    X_test_mel[i] = scipy.fftpack.idct(X_test[i])

CPU times: user 5.75 s, sys: 1.88 s, total: 7.63 s
Wall time: 7.79 s


In [6]:
def get_correct_recordings_index(spk_labels):
    print('get_correct_recordings_index >>>')
    spk_labels_dict = {i:spk_labels.count(i) for i in set(spk_labels)}
    least_freq_spk = min(list(spk_labels_dict.values()))
    print(least_freq_spk)
    speaker_indexes = []
    frequency_spk_labels_dict = {}
    for x in set(spk_labels):
        frequency_spk_labels_dict[x] = 0
    for index, spk_id in enumerate(spk_labels):
        frequency_spk_labels_dict[spk_id] += 1
        if frequency_spk_labels_dict[spk_id] > least_freq_spk:
            next
        else:
            speaker_indexes.append(index)
    print('get_correct_recordings_index <<<')
    return speaker_indexes

In the test set, currently, there are all labeled pairs person-yt videos, however we need to balance them so that each speaker has the same weight. The first step is to identify the ids of interest:

In [7]:
%%time
X_spk_video_labels_test=X_spk_labels_test
X_spk_labels_test = [''.join(x.split('-')[1:]) for x in X_spk_video_labels_test]
test_ids_balanced = get_correct_recordings_index(X_spk_labels_test)
len(test_ids_balanced)

get_correct_recordings_index >>>
1
get_correct_recordings_index <<<
CPU times: user 333 ms, sys: 370 µs, total: 333 ms
Wall time: 333 ms


958

This means that we'll have 958 test records!

In [8]:
X_test = X_test[test_ids_balanced]
y_test = y_test[test_ids_balanced]
X_test_mel = X_test_mel[test_ids_balanced]

In [9]:
X_test.shape, y_test.shape, X_test_mel.shape

((958,), (958,), (958,))

# Model train
## CNN 1D : Multi input - Multi output

In [10]:
# Params
norm_strat_to_evaluate = ['sub_mean_dataloader']
y_strategy = ['']
l_reg = [0.0]
filter_n = [30]
kernel_size = [3]
pool_size = [(2)]
dense_n = [256]
batch_size = [128]
lr = [0.01]
optimizer = ['adam']
second_dense_n = [128]
data_augmentation = [True]
selective_data_aug = [False]
loss = ['mse_plus_cross']
block_list = [[1, 1, 1]]
global_avg = [True]
train_combinations = list(itertools.product(['cnn_resnet_1d'],
                                            norm_strat_to_evaluate,
                                            y_strategy,
                                            l_reg,
                                            filter_n,
                                            kernel_size,
                                            pool_size,
                                            dense_n,
                                            batch_size,
                                            lr,
                                            optimizer,
                                            second_dense_n,
                                            data_augmentation,
                                            selective_data_aug,
                                            loss,
                                            block_list,
                                            global_avg
                                            ))
print("Number of training combinations that will now be evaluated:", len(train_combinations))

Number of training combinations that will now be evaluated: 1


In [None]:
model = None
for model, strategy, y_strat, l_reg_value, n_filt, n_kern, n_pool, n_dense, n_batch, lr, optim, neuron_2nd_dense, data_aug, sel_data_aug, loss_type, n_blocks, g_avg in train_combinations:
    timestr = time.strftime("%Y%m%d-%H%M%S")

    config = {
        'batch_size': n_batch,
        'patience': 50,
        'epochs': 300,
        'lr': lr,
        'seed': my_seed,
        'l_reg': 0,
        'log_interval': 1,
        'model_name': model,
        'feature_norm': strategy,
        'y_strategy': y_strat,
        'dropout': True,
        'dataset': 'age',
        'embedding': 'mel_spect_kaldi',
        'folder_fn': 'mfcc/age/',
        'mfcc_shape': (200, X[0].shape[1]),
        'data_augmentation': data_aug,
        'selective_data_aug': sel_data_aug,
        'kernel_initializer': 'glorot_normal',
        'loss': loss_type,
        'random_pick_mfcc': True,
        'generator on both train and test': True,
        'timestamp': timestr,
        'shuffle_temporal': None,
        'block_list': n_blocks,
        'lr_plateau': True,
        'lr_plateau_factor': 0.1,
        'lr_plateau_patience': 15,
        'relu_type': 'relu',
        'batch_norm': True,
        'global_average': g_avg,
        'reduce_mel': False,
        'n_categories': 8,
        'multi_output': True,
        'sampling_strategy': None,
        'without_initial_batch_norm': True,
        'cooldown': 5,
        'class_weights': None,
        'min_lr': 0.00001,
        'include_title_only_obs': True,
        'unbalanced': True,
        'unbalanced_include_title_only_obs': True

    }
    config['filter_n'] = n_filt
    config['kernel_size'] = n_kern
    config['pool_size'] = n_pool
    config['dense_n'] = n_dense
    config['optimizer'] = optim
    config['2nd_dense_n'] = neuron_2nd_dense
    config['strides'] = 1
    wandb.init(
        project='voxceleb_enrichment',
        name='_'.join([model, config['embedding'], strategy]),
        config=config
    )
    print(config)
    model = train_holdout(X_mel, y, X_test_mel, y_test, X_aug_mel, y_aug, strategy, config['model_name'], config)


[34m[1mwandb[0m: Currently logged in as: [33mhechmik[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.10.23 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


{'batch_size': 128, 'patience': 50, 'epochs': 300, 'lr': 0.01, 'seed': 19951008, 'l_reg': 0, 'log_interval': 1, 'model_name': 'cnn_resnet_1d', 'feature_norm': 'sub_mean_dataloader', 'y_strategy': '', 'dropout': True, 'dataset': 'age', 'embedding': 'mel_spect_kaldi', 'folder_fn': 'mfcc/age/', 'mfcc_shape': (200, 30), 'data_augmentation': True, 'selective_data_aug': False, 'kernel_initializer': 'glorot_normal', 'loss': 'mse_plus_cross', 'random_pick_mfcc': True, 'generator on both train and test': True, 'timestamp': '20210328-223050', 'shuffle_temporal': None, 'block_list': [1, 1, 1], 'lr_plateau': True, 'lr_plateau_factor': 0.1, 'lr_plateau_patience': 15, 'relu_type': 'relu', 'batch_norm': True, 'global_average': True, 'reduce_mel': False, 'n_categories': 8, 'multi_output': True, 'sampling_strategy': None, 'without_initial_batch_norm': True, 'cooldown': 5, 'class_weights': None, 'min_lr': 1e-05, 'include_title_only_obs': True, 'unbalanced': True, 'unbalanced_include_title_only_obs': Tru

  return f(**kwargs)


Train (92955,) Test (958, 200, 30) Resnet mode None
Epoch 1/300
17143576
Epoch 2/300
17150676
Epoch 3/300
17150676
Epoch 4/300
17150676
Epoch 5/300
17150676
Epoch 6/300
17150676
Epoch 7/300
17152424
Epoch 8/300
17155932
Epoch 9/300
17155932
Epoch 10/300
17155932
Epoch 11/300
17155932
Epoch 12/300
17155932
Epoch 13/300
17155932
Epoch 14/300
17155932
Epoch 15/300
17155932
Epoch 16/300
17155932
Epoch 17/300
17155932
Epoch 18/300
17155932
Epoch 19/300
17155932
Epoch 20/300
17155932
Epoch 21/300
17155932
Epoch 22/300
17155932
Epoch 23/300
17155932
Epoch 24/300
17155932
Epoch 25/300
17155932
Epoch 26/300
17155932
Epoch 27/300
17155932
Epoch 28/300
17155932
Epoch 29/300
17155932
Epoch 30/300
17155932
Epoch 31/300
17155932
Epoch 32/300
17155932
Epoch 33/300
17155932
Epoch 34/300
17155932
Epoch 35/300
17155932
Epoch 36/300

In [None]:
model.save("model-{}".format(config['timestamp']))
wandb.run.finish()

In [None]:
wandb.run.finish()

In [14]:
! ls | grep model*

grep: model-20210328-002928: Is a directory
grep: model-20210328-102247: Is a directory
grep: model-20210328-145849: Is a directory
grep: model-20210328-185850: Is a directory
grep: model-20210328-223050: Is a directory


## CNN 1D: Single input - single output

In [15]:
# Params
norm_strat_to_evaluate = ['sub_mean_dataloader']
y_strategy = ['']
l_reg = [0.0]
filter_n = [30]
kernel_size = [3]
pool_size = [(2)]
dense_n = [256]
batch_size = [128]
lr = [0.01]
optimizer = ['adam']
second_dense_n = [128]
data_augmentation = [True]
selective_data_aug = [False]
loss = ['mse']
block_list = [[1, 1, 1]]
global_avg = [True]
train_combinations = list(itertools.product(['cnn_resnet_1d'],
                                            norm_strat_to_evaluate,
                                            y_strategy,
                                            l_reg,
                                            filter_n,
                                            kernel_size,
                                            pool_size,
                                            dense_n,
                                            batch_size,
                                            lr,
                                            optimizer,
                                            second_dense_n,
                                            data_augmentation,
                                            selective_data_aug,
                                            loss,
                                            block_list,
                                            global_avg
                                            ))
print("Number of training combinations that will now be evaluated:", len(train_combinations))

Number of training combinations that will now be evaluated: 1


In [17]:
model = None
for model, strategy, y_strat, l_reg_value, n_filt, n_kern, n_pool, n_dense, n_batch, lr, optim, neuron_2nd_dense, data_aug, sel_data_aug, loss_type, n_blocks, g_avg in train_combinations:
    timestr = time.strftime("%Y%m%d-%H%M%S")

    config = {
        'batch_size': n_batch,
        'patience': 25,
        'epochs': 300,
        'lr': lr,
        'seed': my_seed,
        'l_reg': 0,
        'log_interval': 1,
        'model_name': model,
        'feature_norm': strategy,
        'y_strategy': y_strat,
        'dropout': True,
        'dataset': 'age',
        'embedding': 'mel_spect_kaldi',
        'folder_fn': 'mfcc/age/',
        'mfcc_shape': (200, X[0].shape[1]),
        'data_augmentation': data_aug,
        'selective_data_aug': sel_data_aug,
        'kernel_initializer': 'glorot_normal',
        'loss': loss_type,
        'random_pick_mfcc': True,
        'generator on both train and test': True,
        'timestamp': timestr,
        'shuffle_temporal': None,
        'block_list': n_blocks,
        'lr_plateau': True,
        'lr_plateau_factor': 0.1,
        'lr_plateau_patience': 20,
        'relu_type': 'relu',
        'batch_norm': True,
        'global_average': g_avg,
        'reduce_mel': False,
        'n_categories': 0,
        'multi_output': None,
        'sampling_strategy': None,
        'without_initial_batch_norm': True,
        'cooldown': 10,
        'class_weights': None,
        'min_lr': 0.00001,
        'include_title_only_obs': True,
        'unbalanced': True,
        'unbalanced_include_title_only_obs': True

    }
    config['filter_n'] = n_filt
    config['kernel_size'] = n_kern
    config['pool_size'] = n_pool
    config['dense_n'] = n_dense
    config['optimizer'] = optim
    config['2nd_dense_n'] = neuron_2nd_dense
    config['strides'] = 1
    wandb.init(
        project='voxceleb_enrichment',
        name='_'.join([model, config['embedding'], strategy]),
        config=config
    )
    print(config)
    model = train_holdout(X_mel, y, X_test_mel, y_test, X_aug, y_aug, strategy, config['model_name'], config)


[34m[1mwandb[0m: wandb version 0.10.23 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


{'batch_size': 128, 'patience': 25, 'epochs': 300, 'lr': 0.01, 'seed': 19951008, 'l_reg': 0, 'log_interval': 1, 'model_name': 'cnn_resnet_1d', 'feature_norm': 'sub_mean_dataloader', 'y_strategy': '', 'dropout': True, 'dataset': 'age', 'embedding': 'mel_spect_kaldi', 'folder_fn': 'mfcc/age/', 'mfcc_shape': (200, 30), 'data_augmentation': True, 'selective_data_aug': False, 'kernel_initializer': 'glorot_normal', 'loss': 'mse', 'random_pick_mfcc': True, 'generator on both train and test': True, 'timestamp': '20210329-155302', 'shuffle_temporal': None, 'block_list': [1, 1, 1], 'lr_plateau': True, 'lr_plateau_factor': 0.1, 'lr_plateau_patience': 20, 'relu_type': 'relu', 'batch_norm': True, 'global_average': True, 'reduce_mel': False, 'n_categories': 0, 'multi_output': None, 'sampling_strategy': None, 'without_initial_batch_norm': True, 'cooldown': 10, 'class_weights': None, 'min_lr': 1e-05, 'include_title_only_obs': True, 'unbalanced': True, 'unbalanced_include_title_only_obs': True, 'filter

Train (92955,) Test (958, 200, 30) Resnet mode None
Epoch 1/300
20361288
Epoch 2/300
20365196
Epoch 3/300
20365200
Epoch 4/300
20365204
Epoch 5/300
20365204
Epoch 6/300
20365204
Epoch 7/300
20365204
Epoch 8/300
20365204
Epoch 9/300
20365204
Epoch 10/300
20365204
Epoch 11/300
20365204
Epoch 12/300
20365204
Epoch 13/300
20365204
Epoch 14/300
20365204
Epoch 15/300
20365204
Epoch 16/300
20365204
Epoch 17/300
20365204
Epoch 18/300
20365204
Epoch 19/300
20365204
Epoch 20/300
20365204
Epoch 21/300
20365204
Epoch 22/300
20365204
Epoch 23/300
20365204
Epoch 24/300
20365204
Epoch 25/300
20365204
Epoch 26/300
20365204
Epoch 27/300
20365204
Epoch 28/300
20365204
Epoch 29/300
20365204
Epoch 30/300
20365204
Epoch 31/300
20365204
Epoch 32/300
20365204
Epoch 33/300
20365204
Epoch 34/300
20365204
Epoch 35/300
20365204
Epoch 36/300
20365204
Epoch 37/300
20365204
Epoch 38/300
20365204
Epoch 39/300
20365204
Epoch 40/300
20365204
Epoch 41/300
20365204
Epoch 42/300
20365204
Epoch 43/300


20365204
Epoch 44/300
20365204
Epoch 45/300
20365204
Epoch 46/300
20365204
Epoch 47/300
20365204
Epoch 48/300
20365204
Epoch 49/300
20365204
Epoch 50/300
20365204
Epoch 51/300
20365204
Epoch 52/300
20365204
Epoch 53/300
20365204
Epoch 54/300
20365204
Epoch 55/300
20365204
Epoch 56/300
20365204


In [18]:
model.save("model-{}".format(config['timestamp']))
wandb.run.finish()

INFO:tensorflow:Assets written to: model-20210329-155302/assets


0,1
epoch,55.0
loss,118.67455
mae,8.64572
mse,118.67455
val_loss,176.21742
val_mae,10.64664
val_mse,176.21742
_step,55.0
_runtime,9094.0
_timestamp,1617031481.0


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
loss,█▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
mae,█▄▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
mse,█▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▅▆▅▃▅█▁▆▂▂▇▄▂▃▃▃▆▃▁▁▁▂▁█▂▁▁▂▂▃▁▁▁▂▂▂▃▁▁▁
val_mae,▇▆▆▂▅█▂▅▃▁▇▄▁▃▂▃▆▄▂▂▂▃▂█▂▂▁▂▁▂▂▁▁▂▂▃▁▁▁▂
val_mse,▅▆▅▃▅█▁▆▂▂▇▄▂▃▃▃▆▃▁▁▁▂▁█▂▁▁▂▂▃▁▁▁▂▂▂▃▁▁▁
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███


# Evaluate models
## CNN 1-D : Single input - single output

In [20]:
from tensorflow import keras
model = keras.models.load_model('model-20210329-155302/')

In [21]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 200, 30)]    0                                            
__________________________________________________________________________________________________
conv1d_7 (Conv1D)               (None, 200, 30)      2730        input_2[0][0]                    
__________________________________________________________________________________________________
re_lu_9 (ReLU)                  (None, 200, 30)      0           conv1d_7[0][0]                   
__________________________________________________________________________________________________
batch_normalization_12 (BatchNo (None, 200, 30)      120         re_lu_9[0][0]                    
____________________________________________________________________________________________

In [22]:
%%time
data = np.load('/media/hdd1/khaled/npz_files/final_version/test_data.npz', allow_pickle=True)

vectors = []
for x in list(data.keys()):
    vectors.append(data[x])
X_test, y_test, X_spk_video_labels_test = vectors
data.close()
data = None
vectors = None
del data, vectors


X_test_mel = empty(X.shape, dtype='object')
for i in range(X_test.shape[0]):
    X_test_mel[i] = scipy.fftpack.idct(X_test[i])
X_spk_labels_test = [''.join(x.split('-')[1:]) for x in X_spk_video_labels_test]
test_ids_balanced = get_correct_recordings_index(X_spk_labels_test)
len(test_ids_balanced)
X_test = X_test[test_ids_balanced]
y_test = y_test[test_ids_balanced]
X_test_mel = X_test_mel[test_ids_balanced]

get_correct_recordings_index >>>
1
get_correct_recordings_index <<<
CPU times: user 5.87 s, sys: 1.14 s, total: 7.01 s
Wall time: 7.41 s


In [23]:
X_test.shape, y.shape

((958,), (10972,))

In [27]:
y_pred_avg = []
for i, test_track in enumerate(tqdm.tqdm(X_test)):
    n_slice_to_compute = test_track.shape[0] - 200
    current_track_pred = []
    for idx in range(0, n_slice_to_compute, 100):        
        sliced_track = test_track[idx:idx+200,:]
        #print(sliced_track.shape)
        sliced_track = sliced_track - np.mean(sliced_track,axis=0)
        #print(sliced_track.shape)
        slice_pred = model.predict(sliced_track.reshape(1, 200, 30))[0]
        current_track_pred.append(slice_pred)
    # Last prediction:
    sliced_track = test_track[-200:,:]
    sliced_track = sliced_track - np.mean(sliced_track,axis=0)
    #print(sliced_track.shape)
    slice_pred = model.predict(sliced_track.reshape(1, 200, 30))[0]
    current_track_pred.append(slice_pred)
    y_pred_avg.append(np.mean(current_track_pred))

100%|██████████| 958/958 [08:35<00:00,  1.86it/s]


In [28]:
np.mean(np.abs(np.array(y_pred_avg) - y_test))

16.770498279738774