In [15]:
import os
import librosa
import numpy as np
import scipy.signal
import pandas as pd
from sklearn.linear_model import RidgeClassifierCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from sktime.datasets import load_arrow_head  # univariate dataset
from sktime.datasets import load_basic_motions  # multivariate dataset
from sktime.datasets import (
    load_japanese_vowels,  # multivariate dataset with unequal length
)
from sktime.transformations.panel.rocket import (
    MiniRocket,
    MiniRocketMultivariate,
    MiniRocketMultivariateVariable,
)
from scipy.io.wavfile import read
import utils as utils_lib


In [16]:
# import os
# import sys
# module_path = os.path.abspath(os.path.join('.'))
# if module_path not in sys.path:
#     sys.path.append(module_path)
all_files = utils_lib.get_files('/Users/yizhar/Downloads/archive')
all_files = [item for item in all_files if item.endswith('.wav')]

In [17]:
train_wav_files = [item for item in all_files if 'train' in item]
test_wav_files = [item for item in all_files if 'test' in item]
train_wav_files.sort()
test_wav_files.sort()
len_train_wav_files = len(train_wav_files)
len_test_wav_files = len(test_wav_files)
print(len_test_wav_files)
print(len_train_wav_files)

300
1140


In [18]:
longest_file, size = utils_lib.get_longest_duration_file(all_files)
print(longest_file)
series, sr = librosa.load(longest_file)
max_series_size = series.shape[0]
print(max_series_size) 

/Users/yizhar/Downloads/archive/audio_speech_actors_01-24/Actor_19/03-01-07-02-01-02-19.wav
116247


In [19]:
train_series = np.zeros((len_train_wav_files, max_series_size), dtype=np.float32)
y_train = np.zeros(len_train_wav_files, dtype=np.int16)
for file_no, file in enumerate(train_wav_files):
    file_name = os.path.basename(file)
    emotion_int = int(file_name.split('-')[2])
    y_train[file_no] += emotion_int
    sample_series, _ = librosa.load(file)
    train_series[file_no][:len(sample_series)] += sample_series

print(train_series.shape)
print(y_train.shape)
    

(1140, 116247)
(1140,)


In [31]:
kernels_num = 128
features_num = 9996

In [33]:
X_transformed_features_train = np.empty((len_train_wav_files, features_num), dtype=np.float32)
minirocket = MiniRocket()  # by default, MiniRocket uses ~10_000 kernels
minirocket.fit(train_series[0])
for idx, input_series in enumerate(train_series):
    if idx<5:
        print(f'start transforming {idx}')
    X_transformed_features_train[idx] = minirocket.transform(input_series)

# X_transformed_features_train = [minirocket.fit_transform(input_series) for input_series in train_series]

start transforming 0
start transforming 1
start transforming 2
start transforming 3
start transforming 4


In [34]:
scaler = StandardScaler(with_mean=False)
X_std_features_train = scaler.fit_transform(X_transformed_features_train)

In [35]:
classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
classifier.fit(X_std_features_train, y_train)

In [36]:
test_series = np.zeros((len_test_wav_files, max_series_size), dtype=np.float32)
y_test = np.zeros(len_test_wav_files, dtype=np.int16)
for file_no, file in enumerate(test_wav_files):
    file_name = os.path.basename(file)
    emotion_int = int(file_name.split('-')[2])
    y_test[file_no] += emotion_int
    sample_series, _ = librosa.load(file)
    test_series[file_no][:len(sample_series)] += sample_series

print(test_series.shape)
print(y_test.shape)

(300, 116247)
(300,)


In [37]:
X_transformed_features_test = np.empty((len_test_wav_files, features_num), dtype=np.float32)
for idx, input_series in enumerate(test_series):
    X_transformed_features_test[idx] = minirocket.transform(input_series)

In [38]:
X_std_features_test = scaler.transform(X_transformed_features_test)
classifier.score(X_std_features_test, y_test)

0.5233333333333333

In [4]:
# Biodata = {'Name': ['John', 'Emily', 'Mike', 'Lisa'],
#         'Age': [28, 23, 35, 31],
#         'Gender': ['M', 'F', 'M', 'F']
#         }
# df = pd.DataFrame(Biodata)

# # Save the dataframe to a CSV file
# df.to_csv('Biodata.csv', index=False)

In [5]:
# from_csv_df = pd.read_csv('Biodata.csv')
# print(from_csv_df)

In [6]:
max_len = 0
i=0
num_of_experiments = 24*2*2*(7*2+1)
sampled_data = np.zeros([num_of_experiments,3*44100],dtype=np.float32)
labels = np.zeros([num_of_experiments,5],dtype=np.uint8)
for i_actor in range(1,24+1,1):
    for i_statement in range(1,2+1,1):
        for i_repetition in range(1,2+1,1):
            for i_intensity in range(1,2+1,1):
                for i_emothion in range(1,8+1,1):
                    if i_emothion==1 and i_intensity!=1:
                        continue
                    new_sample, *_ = librosa.load(f'data/ravdess/audio_speech_actors_01-24/Actor_{i_actor:02d}/03-01-{i_emothion:02d}-{i_intensity:02d}-{i_statement:02d}-{i_repetition:02d}-{i_actor:02d}.wav')
                    if new_sample.size > max_len:
                        max_len = new_sample.size
                    sampled_data[i,:new_sample.size] = new_sample.reshape(1,new_sample.size)
                    labels[i,0] = i_actor
                    labels[i,1] = i_statement
                    labels[i,2] = i_repetition
                    labels[i,3] = i_intensity
                    labels[i,4] = i_emothion
                    i += 1

display(sampled_data.shape)
display(labels.shape)

(1440, 132300)

(1440, 5)

In [7]:
num_of_files = sampled_data.shape[0]
train_files = int(4 / 5 * num_of_files)
X_train = sampled_data[:train_files]
y_train = labels[:train_files]
X_test = sampled_data[train_files:]
y_test = labels[train_files:]
display(X_train.shape)
display(y_train.shape)
display(X_test.shape)
display(y_test.shape)

(1152, 132300)

(1152, 5)

(288, 132300)

(288, 5)

In [10]:
minirocket = MiniRocket(num_kernels=256)  # by default, MiniRocket uses ~10_000 kernels
minirocket.fit(X_train)
X_train_transform = minirocket.transform(X_train)
# test shape of transformed training data -> (n_instances, 9_996)
X_train_transform.shape

(1, 66679200)

In [None]:
X_train_transform = pd.read_csv('/Users/yizhar/PycharmProjects/check_mini_rocket/transformed_data_256_kernels.csv')

In [16]:
scaler = StandardScaler(with_mean=False)
classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))

X_train_scaled_transform = scaler.fit_transform(X_train_transform)


In [18]:
display(y_train)
classifier.fit(X_train_scaled_transform, y_train)

array([[ 1,  1,  1,  1,  1],
       [ 1,  1,  1,  1,  2],
       [ 1,  1,  1,  1,  3],
       ...,
       [20,  1,  1,  2,  3],
       [20,  1,  1,  2,  4],
       [20,  1,  1,  2,  5]], dtype=uint8)

ValueError: Found input variables with inconsistent numbers of samples: [1, 1152]

In [None]:
X_test_transform = minirocket.transform(X_test)
X_test_scaled_transform = scaler.transform(X_test_transform)
classifier.score(X_test_scaled_transform, y_test)

In [6]:
# def modify_values(arr):
#     modified_arr = np.copy(arr)  # Create a copy to avoid modifying the original array
#     for i in range(1, arr.shape[1]):  # Iterate through each column (starting from the second one)
#         zero_indices = np.where(arr[:, i] == 0)[0]  # Find indices where the value is 0 in the current column
#         for idx in zero_indices:
#             if arr[idx, i - 1] < 0:
#                 modified_arr[idx, i] = -np.exp(-6)
#             else:
#                 modified_arr[idx, i] = np.exp(6)
#     return modified_arr

In [7]:
print(X_train[0])

[ 5.3765351e-08 -2.1233141e-08 -6.0483654e-09 ...  0.0000000e+00
  0.0000000e+00  0.0000000e+00]


In [8]:
# X_train_modified = modify_values(X_train)
# a= np.where(X_train_modified == 0 )
 # print(a)

(array([  60,   61,   63,   64,   65,   66,   67,   69,   70,   71,   73,
         74,   75,   76,   78,   80,   81,   82,   83,   85,   86,   87,
         91,   92,   93,   94,   95,   96,   97,   98,  100,  102,  103,
        108,  109,  111,  112,  113,  114,  115,  116,  117,  118,  119,
        134,  136,  138,  150,  152,  164,  166,  167,  172,  175,  180,
        181,  182,  184,  185,  186,  187,  188,  190,  191,  192,  193,
        194,  195,  196,  198,  199,  201,  204,  206,  208,  209,  210,
        211,  213,  214,  215,  217,  218,  219,  222,  223,  224,  225,
        226,  228,  230,  232,  233,  236,  238,  239,  300,  302,  315,
        322,  335,  336,  340,  348,  351,  372,  373,  374,  395,  410,
        411,  418,  488,  500,  512,  524,  530,  533,  664,  674,  679,
        684,  687,  691,  702,  707,  716,  717,  719,  780,  781,  783,
        785,  786,  787,  788,  789,  791,  792,  793,  794,  795,  796,
        797,  799,  801,  802,  803,  804,  805,  

In [4]:
data = read('data/ravdess/audio_speech_actors_01-24/Actor_01/03-01-01-01-01-01-01.wav')
data = np.array(data[1], dtype=float)

  data = read('data/ravdess/audio_speech_actors_01-24/Actor_01/03-01-01-01-01-01-01.wav')
