In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

import librosa
from tqdm.notebook import tqdm
from IPython.display import Audio
from pandas.api.types import is_numeric_dtype

import sys
#sys.path.append('../')
import default_style

In [2]:
TS_DATASET_FOLDER = os.path.join('..', 'dataset')
TS_PREPROC_FOLDER = os.path.join(TS_DATASET_FOLDER, 'preprocessed_traces')
DF_PREPROC_FILE = os.path.join(TS_PREPROC_FOLDER, 'preproc_ts.df')

INTERESTING_TRACES = ['clean_trace', 'std_phon_trace', 'traces_fourier', 'syllables_fourier']
INTERESTING_TRACES += [f'syllables_{i}_ts' for i in range(7)]

In [3]:
df = pd.read_csv(DF_PREPROC_FILE).drop(columns=['Unnamed: 0'])
df.head()

Unnamed: 0,vocal_channel,emotion,emotional_intensity,statement,repetition,actor,path,sex,begin_s,end_s,fourier_coeffs,filtered_fourier_coeffs,syll_0_start_index,syll_1_start_index,syll_2_start_index,syll_3_start_index,syll_4_start_index,syll_5_start_index,syll_6_start_index,syll_7_start_index
0,speech,surprised,strong,dogs,1st,actor_01,../dataset/Audio_Speech_Actors_01-24/Actor_01/...,M,1.0075,2.498167,[1.0788441e-05 3.7551079e-02 3.6168553e-02 ......,[1.0788441e-05 3.7551045e-02 3.6168423e-02 ......,0,2872,3920,4536,5536,6480,7136,8936
1,speech,surprised,normal,kids,1st,actor_01,../dataset/Audio_Speech_Actors_01-24/Actor_01/...,M,1.024167,2.357167,[8.6426735e-06 1.0938305e-02 7.4250461e-03 ......,[8.6426735e-06 1.0938293e-02 7.4250125e-03 ......,0,1544,2528,4032,5224,5984,6712,7992
2,speech,angry,normal,dogs,1st,actor_01,../dataset/Audio_Speech_Actors_01-24/Actor_01/...,M,1.029333,2.4675,[1.1563301e-05 4.1558143e-02 2.8845785e-02 ......,[1.15633011e-05 4.15581055e-02 2.88456734e-02 ...,0,2992,4368,4912,5896,6768,7400,8624
3,speech,fearful,normal,dogs,2nd,actor_01,../dataset/Audio_Speech_Actors_01-24/Actor_01/...,M,1.0305,2.71,[2.0563602e-06 1.1770763e-02 5.6602592e-03 ......,[2.0563602e-06 1.1770754e-02 5.6602429e-03 ......,0,4576,5064,6240,7544,8248,8784,10072
4,speech,fearful,strong,kids,2nd,actor_01,../dataset/Audio_Speech_Actors_01-24/Actor_01/...,M,1.047667,3.127167,[6.9618225e-05 1.3387279e-02 8.0524459e-02 ......,[6.9618225e-05 1.3387272e-02 8.0524310e-02 ......,0,1984,2504,3648,5176,8320,10256,12472


In [4]:
traces = dict()
for t in INTERESTING_TRACES:
    traces[t] = np.load(os.path.join(TS_PREPROC_FOLDER, f'{t}.npy'), allow_pickle=True)

traces['syll_labels'] = np.repeat(np.arange(7), len(traces['syllables_fourier'])//7)
print(traces['syll_labels'].shape)

(17164,)


In [5]:
traces.keys()

dict_keys(['clean_trace', 'std_phon_trace', 'traces_fourier', 'syllables_fourier', 'syllables_0_ts', 'syllables_1_ts', 'syllables_2_ts', 'syllables_3_ts', 'syllables_4_ts', 'syllables_5_ts', 'syllables_6_ts', 'syll_labels'])

In [6]:
fourier_traces = traces['traces_fourier']
fourier_traces

array([[-4.90004350e-05,  7.73346967e-01, -1.79433335e-01, ...,
        -2.69684771e+00, -9.77675485e-01, -2.10445147e+00],
       [-3.62472570e-06,  1.34848667e-01, -2.63034843e-01, ...,
        -7.27813660e-01, -5.68124943e-01, -9.50867586e-01],
       [-6.99467997e-06, -5.24152273e-01,  2.77687184e-01, ...,
         1.94433602e+00, -1.06601257e+00, -1.31455424e+00],
       ...,
       [ 2.15095951e-05, -1.99329731e-01, -1.14561768e+00, ...,
         2.93436020e+00,  1.61758936e+00, -7.89978593e-01],
       [ 2.80587450e-04,  1.06084298e+00, -1.59030824e+00, ...,
        -1.32125137e+00, -1.03488435e+00, -1.52757650e+00],
       [-1.06901232e-04, -1.07254930e+00,  2.83278819e-01, ...,
        -2.11945009e-01, -2.80478659e-01,  6.58119640e-01]])

In [7]:
# sort by actors
df = df.sort_values('actor', ignore_index=True)

# fourier_traces must follow the same sorting
ordered_indices = df.index
fourier_traces = fourier_traces[ordered_indices]

df = df.reset_index(drop=True)

fourier_traces

array([[-4.90004350e-05,  7.73346967e-01, -1.79433335e-01, ...,
        -2.69684771e+00, -9.77675485e-01, -2.10445147e+00],
       [-3.62472570e-06,  1.34848667e-01, -2.63034843e-01, ...,
        -7.27813660e-01, -5.68124943e-01, -9.50867586e-01],
       [-6.99467997e-06, -5.24152273e-01,  2.77687184e-01, ...,
         1.94433602e+00, -1.06601257e+00, -1.31455424e+00],
       ...,
       [ 2.15095951e-05, -1.99329731e-01, -1.14561768e+00, ...,
         2.93436020e+00,  1.61758936e+00, -7.89978593e-01],
       [ 2.80587450e-04,  1.06084298e+00, -1.59030824e+00, ...,
        -1.32125137e+00, -1.03488435e+00, -1.52757650e+00],
       [-1.06901232e-04, -1.07254930e+00,  2.83278819e-01, ...,
        -2.11945009e-01, -2.80478659e-01,  6.58119640e-01]])

In [8]:
from sklearn.model_selection import train_test_split

split_index = df[df['actor'] == 'actor_19'].index[0]

X_train = fourier_traces[:split_index]
X_test = fourier_traces[split_index:]

y_train = df['vocal_channel'][:split_index].to_numpy()
y_test = df['vocal_channel'][split_index:].to_numpy()

X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=100)

X_train.shape, X_valid.shape, X_test.shape, y_train.shape, y_valid.shape, y_test.shape

((1462, 16), (366, 16), (624, 16), (1462,), (366,), (624,))

# KNN using euclidean distance

In [9]:
from sklearn.metrics import accuracy_score
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
import optuna

# objective function to be minimized
def objective_fun(trial):

    n_neighbors = trial.suggest_int('n_neighbors', 1, 50)
    weights = trial.suggest_categorical('weights', ['uniform', 'distance'])
    distance = trial.suggest_categorical('distance', ['euclidean'])
    n_jobs = trial.suggest_int('n_jobs', -1, -1)

    knn = KNeighborsTimeSeriesClassifier(
        n_neighbors=n_neighbors,
        weights=weights,
        distance=distance,
        n_jobs=n_jobs
    )

    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_valid)

    error = accuracy_score(y_valid, y_pred)

    return error


study = optuna.create_study()
study.optimize(objective_fun, n_trials=100, n_jobs=-1)

[I 2023-06-21 17:49:45,804] A new study created in memory with name: no-name-29bfd623-db7f-4ea8-af6d-c846a110e8ac
[I 2023-06-21 17:49:47,665] Trial 5 finished with value: 0.5655737704918032 and parameters: {'n_neighbors': 15, 'weights': 'distance', 'distance': 'euclidean', 'n_jobs': -1}. Best is trial 5 with value: 0.5655737704918032.
[I 2023-06-21 17:49:47,667] Trial 0 finished with value: 0.5792349726775956 and parameters: {'n_neighbors': 24, 'weights': 'distance', 'distance': 'euclidean', 'n_jobs': -1}. Best is trial 5 with value: 0.5655737704918032.
[I 2023-06-21 17:49:47,669] Trial 2 finished with value: 0.5901639344262295 and parameters: {'n_neighbors': 37, 'weights': 'distance', 'distance': 'euclidean', 'n_jobs': -1}. Best is trial 5 with value: 0.5655737704918032.
[I 2023-06-21 17:49:47,704] Trial 6 finished with value: 0.5491803278688525 and parameters: {'n_neighbors': 13, 'weights': 'distance', 'distance': 'euclidean', 'n_jobs': -1}. Best is trial 6 with value: 0.549180327868

In [10]:
best_params = study.best_params
best_params

{'n_neighbors': 2, 'weights': 'uniform', 'distance': 'euclidean', 'n_jobs': -1}

In [11]:
knn = KNeighborsTimeSeriesClassifier(**best_params)
knn.fit(X_train, y_train)

In [12]:
from sklearn.metrics import classification_report

y_pred = knn.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

        song       0.43      0.66      0.52       264
      speech       0.59      0.36      0.45       360

    accuracy                           0.49       624
   macro avg       0.51      0.51      0.48       624
weighted avg       0.52      0.49      0.48       624



# KNN using DTW

In [13]:
# objective function to be minimized
def objective_fun(trial):

    n_neighbors = trial.suggest_int('n_neighbors', 1, 50)
    weights = trial.suggest_categorical('weights', ['uniform', 'distance'])
    distance = trial.suggest_categorical('distance', ['dtw'])
    n_jobs = trial.suggest_int('n_jobs', -1, -1)

    knn = KNeighborsTimeSeriesClassifier(
        n_neighbors=n_neighbors,
        weights=weights,
        distance=distance,
        n_jobs=n_jobs
    )

    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_valid)

    error = accuracy_score(y_valid, y_pred)

    return error


study = optuna.create_study()
study.optimize(objective_fun, n_trials=100, n_jobs=-1)

[I 2023-06-21 17:50:05,337] A new study created in memory with name: no-name-ac4a8da9-87f3-4139-990c-af0c6f790658
[I 2023-06-21 17:50:12,406] Trial 2 finished with value: 0.5956284153005464 and parameters: {'n_neighbors': 13, 'weights': 'distance', 'distance': 'dtw', 'n_jobs': -1}. Best is trial 2 with value: 0.5956284153005464.
[I 2023-06-21 17:50:13,031] Trial 3 finished with value: 0.5819672131147541 and parameters: {'n_neighbors': 24, 'weights': 'distance', 'distance': 'dtw', 'n_jobs': -1}. Best is trial 3 with value: 0.5819672131147541.
[I 2023-06-21 17:50:13,662] Trial 4 finished with value: 0.5792349726775956 and parameters: {'n_neighbors': 47, 'weights': 'distance', 'distance': 'dtw', 'n_jobs': -1}. Best is trial 4 with value: 0.5792349726775956.
[I 2023-06-21 17:50:13,670] Trial 7 finished with value: 0.5765027322404371 and parameters: {'n_neighbors': 10, 'weights': 'distance', 'distance': 'dtw', 'n_jobs': -1}. Best is trial 7 with value: 0.5765027322404371.
[I 2023-06-21 17:5

In [14]:
best_params = study.best_params
best_params

{'n_neighbors': 2, 'weights': 'uniform', 'distance': 'dtw', 'n_jobs': -1}

In [15]:
knn = KNeighborsTimeSeriesClassifier(**best_params)
knn.fit(X_train, y_train)

In [16]:
y_pred = knn.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

        song       0.43      0.63      0.51       264
      speech       0.58      0.38      0.46       360

    accuracy                           0.48       624
   macro avg       0.50      0.50      0.48       624
weighted avg       0.52      0.48      0.48       624



# Shapelets

In [17]:
from pyts.transformation import ShapeletTransform

