In [1]:
import os
import numpy as np
from pathlib import Path
import librosa
from sklearn.model_selection import train_test_split
from tsfresh.utilities.dataframe_functions import impute
from tsfresh import extract_features, select_features
from tsfresh.feature_extraction import EfficientFCParameters
import lightgbm as lgb
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score
import seaborn as sns
from matplotlib import pyplot as plt

In [2]:
RAW_DATASET_PATH = os.path.join("..", "animal_sounds")
DATASET_AUDIO_PATH = os.path.join("..", "animal_sounds_clips")
SAMPLING_RATE = 16000

In [3]:
# Get the list of audio file paths along with their corresponding labels

class_names = os.listdir(DATASET_AUDIO_PATH)
print("Our class names: {}".format(class_names,))

train_audio_paths = []
valid_audio_paths = []
train_labels = []
valid_labels = []
for label, name in enumerate(class_names):
    # print("Processing material {}".format(name,))
    dir_path = Path(DATASET_AUDIO_PATH) / name
    speaker_sample_paths = [
        os.path.join(dir_path, filepath)
        for filepath in os.listdir(dir_path)
        if filepath.lower().endswith(".wav")
    ]
    label = label // 2 # coz every dir has a _valid copy
    if name.endswith("_valid"):
        valid_audio_paths += speaker_sample_paths
        valid_labels += [label] * len(speaker_sample_paths)
    else:
        train_audio_paths += speaker_sample_paths
        train_labels += [label] * len(speaker_sample_paths)
    # print(f"Loaded {len(speaker_sample_paths)} files from class {label}.")
    
print(
    "Found {} files belonging to {} classes.".format(len(train_audio_paths) + len(valid_audio_paths), len(class_names)//2)
)

Our class names: ['kidney_animal', 'kidney_animal_valid', 'liver_animal', 'liver_animal_valid', 'muscle_animal', 'muscle_animal_valid', 'ribs_animal', 'ribs_animal_valid', 'skin_animal', 'skin_animal_valid']
Found 3048 files belonging to 5 classes.


In [4]:
def path_to_audio(path):
    y, _ = librosa.load(path, sr=SAMPLING_RATE)
    return y

def paths_and_labels_to_dataset(audio_paths, labels):
    """Constructs a dataset of audios and labels."""
    audios = [path_to_audio(path) for path in audio_paths]
    return np.array(audios), np.array(labels)

def to_dataframe(dataset):
    column_values = [i + 1 for i in range(len(dataset[0]))]
    df = pd.DataFrame(data=dataset, columns=column_values)
    df["id"] = df.index
    df = df.melt(id_vars="id", var_name="time").sort_values(["id", "time"]).reset_index(drop=True)
    return df
        

In [5]:
train_dataset, train_labels = paths_and_labels_to_dataset(train_audio_paths, train_labels)
test_dataset, test_labels = paths_and_labels_to_dataset(valid_audio_paths, valid_labels)

In [6]:
X_train = to_dataframe(train_dataset)
X_test = to_dataframe(test_dataset)

In [7]:
X_train.shape, X_test.shape

((23512000, 3), (872000, 3))

In [8]:
train_to_take = 1_000_000

X_train_features = extract_features(X_train[X_train['id'] < train_to_take], column_id="id", column_sort="time", impute_function=impute, default_fc_parameters=EfficientFCParameters())
y_train = train_labels[:train_to_take]
X_test_features = extract_features(X_test, column_id="id", column_sort="time", impute_function=impute, default_fc_parameters=EfficientFCParameters())
y_test = test_labels


Feature Extraction: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [4:29:45<00:00, 809.26s/it]
Feature Extraction: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [01:33<00:00,  4.90s/it]


In [9]:
X_train_features

Unnamed: 0,value__variance_larger_than_standard_deviation,value__has_duplicate_max,value__has_duplicate_min,value__has_duplicate,value__sum_values,value__abs_energy,value__mean_abs_change,value__mean_change,value__mean_second_derivative_central,value__median,...,value__fourier_entropy__bins_5,value__fourier_entropy__bins_10,value__fourier_entropy__bins_100,value__permutation_entropy__dimension_3__tau_1,value__permutation_entropy__dimension_4__tau_1,value__permutation_entropy__dimension_5__tau_1,value__permutation_entropy__dimension_6__tau_1,value__permutation_entropy__dimension_7__tau_1,value__query_similarity_count__query_None__threshold_0.0,value__mean_n_absolute_max__number_of_maxima_7
0,0.0,0.0,0.0,1.0,5442.410156,3702.868896,0.001535,7.592197e-07,7.631302e-09,0.680450,...,0.737636,1.099126,3.065079,1.788779,3.157024,4.734408,6.449452,8.029336,0.0,0.817596
1,0.0,1.0,0.0,1.0,5436.419922,3696.550293,0.001895,-6.867313e-08,-1.907826e-09,0.679504,...,0.420998,0.690200,2.399786,1.787741,3.153458,4.731256,6.454871,8.044514,0.0,0.988098
2,0.0,1.0,0.0,1.0,5435.382812,3695.146484,0.001736,-2.289104e-07,-2.289391e-08,0.679443,...,0.420998,0.690200,2.402610,1.789662,3.164328,4.740553,6.448715,8.005685,0.0,0.988098
3,0.0,0.0,0.0,1.0,5434.589355,3691.880615,0.000541,4.044085e-07,-4.197216e-08,0.680054,...,0.280179,0.441954,1.199551,1.782486,3.146308,4.714185,6.415959,7.984807,0.0,0.696747
4,0.0,0.0,0.0,1.0,5444.305176,3705.062988,0.000413,-8.011866e-08,5.341912e-08,0.680511,...,0.635646,0.939786,2.729896,1.782404,3.144504,4.712175,6.422619,8.018074,0.0,0.690848
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2934,0.0,0.0,0.0,1.0,5441.645020,3701.455322,0.001170,1.182704e-07,5.723477e-08,0.680084,...,1.266461,1.907174,3.834546,1.785307,3.143457,4.710714,6.425083,8.018189,0.0,0.695225
2935,0.0,0.0,0.0,1.0,5444.867676,3706.078613,0.002143,1.106401e-07,-1.335478e-08,0.680664,...,0.953933,1.569589,3.346926,1.781655,3.134923,4.695734,6.395577,7.976519,0.0,0.753165
2936,0.0,0.0,0.0,1.0,5445.537598,3707.047363,0.002443,1.869435e-07,5.723477e-09,0.680695,...,0.897554,1.490472,3.191544,1.784079,3.139694,4.703382,6.405064,7.985900,0.0,0.755768
2937,0.0,0.0,0.0,1.0,5446.076660,3707.793945,0.002542,0.000000e+00,-9.729911e-08,0.680786,...,0.900166,1.442432,3.264185,1.780043,3.128046,4.683497,6.378538,7.951567,0.0,0.754203


In [10]:
y_train

array([0, 0, 0, ..., 4, 4, 4])

In [11]:
relevant_features = set()

for label in np.unique(y_train):
    y_train_binary = y_train == label
    X_train_filtered = select_features(X_train_features, y_train_binary)
    print("Number of relevant features for class {}: {}/{}".format(label, X_train_filtered.shape[1], X_train_features.shape[1]))
    relevant_features = relevant_features.union(set(X_train_filtered.columns))

Number of relevant features for class 0: 270/777
Number of relevant features for class 1: 286/777
Number of relevant features for class 2: 221/777
Number of relevant features for class 3: 380/777
Number of relevant features for class 4: 158/777


In [12]:
X_train_filtered = X_train_features[list(relevant_features)]
X_test_filtered = X_test_features[list(relevant_features)]

In [13]:
X_train_filtered.columns = [i for i in range(X_train_filtered.shape[1])]
X_test_filtered.columns = [i for i in range(X_test_filtered.shape[1])]

In [14]:
clf = lgb.LGBMClassifier()
clf.fit(X_train_filtered, y_train)

In [15]:
y_pred=clf.predict(X_test_filtered)

In [16]:

accuracy=accuracy_score(y_pred, y_test)
print('LightGBM Model accuracy score: {0:0.4f}'.format(accuracy))

LightGBM Model accuracy score: 0.7156


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4])

In [21]:
X_filtered = pd.concat([X_train_filtered, X_test_filtered])
y = np.concatenate((y_train, y_test))
y_pred = clf.predict(X_filtered)

In [24]:

# accuracy=accuracy_score(y_pred, y)
# print('LightGBM Model accuracy score: {0:0.4f}'.format(accuracy))

LightGBM Model accuracy score: 0.9898


In [25]:

cm = confusion_matrix(y, y_pred).astype(np.float64)
print(cm, cm.shape)
for i in range(cm.shape[0]):
  cm[i, :] /= np.sum(cm[i, :])
  
cm_matrix = pd.DataFrame(data=cm)
# cm_matrix.set_index(['kidney','liver','muscle', 'ribs', 'skin'])
cm_matrix.columns = ['kidney','liver','muscle', 'ribs', 'skin']

plt.figure(figsize=(7, 5))
sns.heatmap(cm_matrix, annot=True, fmt=".3f", square=True, cbar=False, cmap="Blues", linewidths=3, vmin=0, vmax=1)
plt.xlabel("Predicted label", labelpad=16)
plt.ylabel("True label", labelpad=12)
plt.tick_params(axis='y', rotation=0)
plt.show()

[[425.   0.   0.   0.   0.]
 [  0. 889.   0.   6.   2.]
 [  0.   2. 382.   1.   5.]
 [  0.   1.   2. 444.   5.]
 [  0.   4.   0.   3. 877.]] (5, 5)


type: "None of ['kidney', 'liver', 'muscle', 'ribs', 'skin'] are in the columns"

In [None]:
# class_names = os.listdir(DATASET_AUDIO_PATH)
# actual_class_names = [n for n in class_names if not n.endswith("_valid")]
# for idx, label in enumerate(actual_class_names):
#     confusion[idx, :] /= np.sum(confusion[idx, :])
#     print(f"{idx} - {label}")