In [219]:
# Standard imports
import numpy as np
import pandas as pd
import random
from datetime import datetime
from collections import Counter

# Sklearn imports
from sklearn.model_selection import train_test_split, GroupShuffleSplit, GroupKFold, StratifiedGroupKFold
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, RobustScaler, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.utils.class_weight import compute_class_weight
from sklearn.inspection import permutation_importance

# Imbalanced-learn import
from imblearn.over_sampling import SMOTE

# TensorFlow and Keras imports
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Input, BatchNormalization, concatenate
from tensorflow.keras.optimizers import Adam, RMSprop, SGD, Adamax, AdamW
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from keras.regularizers import l1, l2, L1L2

# Optuna import
import optuna

# Visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns

# to save the scaler
import joblib

In [220]:
# Set a fixed random seed for reproducibility
random.seed(5390) 
np.random.seed(5390)
tf.random.set_seed(5390)

# Load datasets
dataframe = pd.read_csv('/Users/astrid/PycharmProjects/audioset-thesis-work/audioset/vggish/embeddings/8april_looped_embeddings.csv')

dataframe.drop('mean_freq', axis=1, inplace=True)

def assign_age_group(age, age_groups):
    for group_name, age_range in age_groups.items():
        if age_range[0] <= age < age_range[1]:
            return group_name
    return 'Unknown'  # For any age that doesn't fit the defined groups

# Define age groups
age_groups = {
    'kitten': (0, 0.5),
    'adult': (0.5, 10),
    'senior': (10, 20)
}

# Create a new column for the age group
dataframe['age_group'] = dataframe['target'].apply(assign_age_group, age_groups=age_groups)

print(dataframe['age_group'].value_counts())

adult     460
senior    306
kitten    171
Name: age_group, dtype: int64


# save demo rows to external csv

In [221]:
# Select all rows corresponding to the specified cat_id values
selected_cat_ids = ['108A', '109A', '037A']
demo_samples = dataframe[dataframe['cat_id'].isin(selected_cat_ids)]

In [222]:
demo_samples

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,122,123,124,125,126,127,gender,target,cat_id,age_group
152,2832.0293,-1224.3556,-3104.671,135.267,199.70366,4972.929,-1742.1462,-1933.585,6134.867,2059.8123,...,2552.548,-474.3294,-2933.9365,3395.888,-6107.7456,-1061.9642,X,0.0,109A,kitten
201,2687.3303,-1167.7533,-2927.5361,133.91333,176.22916,4708.437,-1666.4711,-1813.673,5830.6094,1962.3767,...,2429.6272,-439.15106,-2776.0312,3213.3528,-5826.509,-1019.60443,X,0.0,109A,kitten
284,2742.9766,-1191.6954,-2980.137,152.56021,226.44548,4841.6235,-1725.6976,-1820.9745,5985.0947,2013.3196,...,2470.2888,-470.1313,-2825.377,3296.967,-5967.8623,-1079.46,X,0.0,109A,kitten
469,2869.8135,-1226.8926,-3099.9812,133.81638,207.58218,5034.443,-1772.9384,-1923.5074,6199.795,2082.9504,...,2581.1968,-515.4969,-2954.4731,3453.4902,-6172.2075,-1075.6876,X,0.0,109A,kitten
470,3137.8435,-1348.4828,-3411.08,130.62189,211.46841,5463.068,-1941.1052,-2126.2073,6714.9062,2247.8354,...,2828.8767,-578.9888,-3247.779,3778.487,-6727.5615,-1191.706,X,0.0,109A,kitten
494,3867.1643,-1669.0719,-4053.3696,241.06406,302.83722,6505.4146,-2291.9287,-2485.943,7899.703,2651.6104,...,3219.7515,-564.6864,-3733.3735,4508.9165,-8047.7256,-1539.7184,F,16.0,108A,senior
657,3251.643,-1394.386,-3463.1084,230.08203,217.76968,5463.253,-1921.9489,-2112.767,6662.2188,2243.1538,...,2744.6404,-462.9361,-3175.2788,3814.724,-6818.2993,-1289.6487,F,16.0,108A,senior
664,2879.916,-1229.953,-3122.353,141.3518,212.84225,4930.672,-1748.36,-1925.1099,6051.961,2091.002,...,2524.9436,-509.4654,-2921.8657,3454.4592,-6052.8047,-1120.5204,X,7.0,037A,adult
665,3051.1982,-1305.8528,-3279.6475,122.02283,193.3745,5139.2646,-1826.193,-2049.3958,6305.3945,2183.462,...,2658.9524,-549.6913,-3071.2742,3649.5955,-6375.66,-1163.8398,X,7.0,037A,adult
666,3367.7542,-1415.1522,-3596.3857,144.67197,243.7126,5695.8374,-2012.02,-2226.8752,6994.3135,2406.5762,...,2922.9407,-602.8836,-3367.2234,3993.3862,-7069.6455,-1335.3989,X,7.0,037A,adult


In [223]:
# Initialize and fit the label encoder
label_encoder = LabelEncoder()
dataframe['label'] = label_encoder.fit_transform(dataframe['age_group'].values)

In [224]:
dataframe.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,123,124,125,126,127,gender,target,cat_id,age_group,label
0,3253.679,-1300.0604,-3428.619,178.22336,145.87761,5530.401,-1929.7223,-2155.6733,6838.9844,2277.829,...,-530.9152,-3267.7144,3789.5164,-6954.056,-1200.455,M,2.0,006A,adult,0
1,3338.0847,-1419.996,-3464.4106,183.5827,311.41168,5724.5674,-1989.8912,-2187.1287,7025.078,2406.538,...,-515.0856,-3226.898,3920.7097,-7107.833,-1304.0648,F,5.0,000A,adult,0
2,3282.336,-1396.401,-3533.982,149.29416,207.89177,5654.894,-1989.5737,-2193.4783,6968.383,2366.7522,...,-593.87024,-3310.9148,3889.7998,-7059.003,-1274.8529,X,0.0,044A,kitten,1
3,4882.2915,-2161.83,-5307.861,168.995,255.57112,8415.017,-2979.138,-3213.3972,10388.607,3472.3523,...,-883.1938,-4949.7915,5769.624,-10496.903,-2006.7511,X,0.0,014B,kitten,1
4,3503.626,-1458.7937,-3623.8113,196.71686,237.97202,5886.227,-2068.3577,-2297.6812,7219.8496,2454.4438,...,-546.6624,-3363.108,4081.612,-7353.6616,-1369.3765,F,5.0,000A,adult,0


## save embeddings and labels from demo set to .txt

In [225]:
# Ensure the target labels in demo_samples are encoded using the same LabelEncoder
demo_samples = demo_samples.copy()  # Avoid SettingWithCopyWarning
demo_samples['label'] = label_encoder.transform(demo_samples['age_group'].values)

# Extract features and labels from demo_samples
features = demo_samples.iloc[:, :-5].values  # all columns except the last five
labels = demo_samples['label'].values

# Save each row to a separate CSV file
for i, (feature_row, label) in enumerate(zip(features, labels)):
    # Create a DataFrame for the current row
    row_df = pd.DataFrame([np.append(feature_row, label)])
    
    # Create a filename
    filename = f'demo_sample_{i}.csv'
    
    # Save to CSV file
    row_df.to_csv(filename, index=False, header=False)
    
    print(f'Saved {filename}')

Saved demo_sample_0.csv
Saved demo_sample_1.csv
Saved demo_sample_2.csv
Saved demo_sample_3.csv
Saved demo_sample_4.csv
Saved demo_sample_5.csv
Saved demo_sample_6.csv
Saved demo_sample_7.csv
Saved demo_sample_8.csv
Saved demo_sample_9.csv
Saved demo_sample_10.csv
Saved demo_sample_11.csv
Saved demo_sample_12.csv
Saved demo_sample_13.csv
Saved demo_sample_14.csv
Saved demo_sample_15.csv
Saved demo_sample_16.csv
Saved demo_sample_17.csv


In [226]:
# Ensure the target labels are encoded as 0 for kitten and 1 for senior
demo_samples = demo_samples.copy()  # Avoid SettingWithCopyWarning
demo_samples['label'] = label_encoder.transform(demo_samples['age_group'].values)

# Extract features and labels
features = demo_samples.iloc[:, :-5].values
labels = demo_samples['label'].values

# Combine features and labels into a single DataFrame
combined_data = np.hstack((features, labels.reshape(-1, 1)))
combined_df = pd.DataFrame(combined_data)

# Create a filename for the combined CSV file
combined_filename = 'combined_demo_samples.csv'

# Save the combined data to a single CSV file
combined_df.to_csv(combined_filename, index=False, header=False)

print(f'Saved {combined_filename}')

Saved combined_demo_samples.csv


In [227]:
# Count the occurrences of each cat_id
cat_id_counts = dataframe['cat_id'].value_counts().reset_index()
cat_id_counts.columns = ['cat_id', 'count']

# Merge with the age group information
age_group_info = dataframe[['cat_id', 'age_group']].drop_duplicates()
cat_id_counts_with_age_group = cat_id_counts.merge(age_group_info, on='cat_id')

pd.set_option('display.max_rows', None)

# Display the result
cat_id_counts_with_age_group.sort_values(by='count', ascending=True)

Unnamed: 0,cat_id,count,age_group
111,026B,1,adult
92,019B,1,adult
93,110A,1,kitten
94,100A,1,adult
95,090A,1,senior
96,115A,1,kitten
97,091A,1,senior
98,024A,1,senior
99,073A,1,adult
100,066A,1,adult


### samples for demo

In [228]:
# Separate features and labels for the full dataset
X = dataframe.iloc[:, :-5].values  # all columns except the last five
y = dataframe['label'].values

# Convert 'cat_id' column to numpy array to be used as groups array for GroupKFold
groups = dataframe['cat_id'].values

# Scale the features using StandardScaler
scaler_full = StandardScaler().fit(X)
X_scaled = scaler_full.transform(X)

# Encode the labels using one-hot encoding
y_encoded = to_categorical(y, num_classes=3)

# Select specific cat_id values for demonstration samples
kitten_cat_id = "109A"
adult_cat_id = "037A"
senior_cat_id = "108A"

# Select all rows corresponding to the sampled cat_id values
demo_samples = dataframe[(dataframe['cat_id'] == kitten_cat_id) | 
                         (dataframe['cat_id'] == senior_cat_id) | 
                         (dataframe['cat_id'] == adult_cat_id)].index

# Convert dataframe indices to positional indices
demo_sample_positions = dataframe.index.get_indexer(demo_samples)

# Separate demonstration samples using positional indices
X_demo = X_scaled[demo_sample_positions]
y_demo = y_encoded[demo_sample_positions]

# Remove demonstration samples from the training set
X_train_full = np.delete(X_scaled, demo_sample_positions, axis=0)
y_train_full = np.delete(y_encoded, demo_sample_positions, axis=0)

# Print label encoding for verification
print("Label encoding:", dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_))))

Label encoding: {'adult': 0, 'kitten': 1, 'senior': 2}


### train

In [229]:
# EarlyStopping callback: monitor 'loss' instead of 'val_loss' for the test set
early_stopping = EarlyStopping(
    monitor='loss',  
    min_delta=0.001, 
    patience=30,  
    verbose=1,  
    restore_best_weights=True  
)

In [230]:
# Define optimizers
optimizers = {
    'Adamax': Adamax(learning_rate=0.00038188800331973483)
}

# Compute class weights for the training set
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(np.argmax(y_train_full, axis=1)),
    y=np.argmax(y_train_full, axis=1)
)
weight_dict = {i: class_weights[i] for i in range(len(class_weights))}

# Full model definition with dynamic number of layers
model_full = Sequential()
model_full.add(Dense(480, activation='relu', input_shape=(X_train_full.shape[1],)))  # units and input shape from parameters
model_full.add(BatchNormalization())
model_full.add(Dropout(0.27188281261238406))
model_full.add(Dense(3, activation='softmax'))  # for multi-class classification

optimizer = optimizers['Adamax']  # optimizer selection

# Compile the model for categorical classification
model_full.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model on the full training set
history_full = model_full.fit(X_train_full, y_train_full, epochs=1500, batch_size=32,
                              verbose=1, callbacks=[early_stopping], class_weight=weight_dict)



Epoch 1/1500
Epoch 2/1500
Epoch 3/1500
Epoch 4/1500
Epoch 5/1500
Epoch 6/1500
Epoch 7/1500
Epoch 8/1500
Epoch 9/1500
Epoch 10/1500
Epoch 11/1500
Epoch 12/1500
Epoch 13/1500
Epoch 14/1500
Epoch 15/1500
Epoch 16/1500
Epoch 17/1500
Epoch 18/1500
Epoch 19/1500
Epoch 20/1500
Epoch 21/1500
Epoch 22/1500
Epoch 23/1500
Epoch 24/1500
Epoch 25/1500
Epoch 26/1500
Epoch 27/1500
Epoch 28/1500
Epoch 29/1500
Epoch 30/1500
Epoch 31/1500
Epoch 32/1500
Epoch 33/1500
Epoch 34/1500
Epoch 35/1500
Epoch 36/1500
Epoch 37/1500
Epoch 38/1500
Epoch 39/1500
Epoch 40/1500
Epoch 41/1500
Epoch 42/1500
Epoch 43/1500
Epoch 44/1500
Epoch 45/1500
Epoch 46/1500
Epoch 47/1500
Epoch 48/1500
Epoch 49/1500
Epoch 50/1500
Epoch 51/1500
Epoch 52/1500
Epoch 53/1500
Epoch 54/1500
Epoch 55/1500
Epoch 56/1500
Epoch 57/1500
Epoch 58/1500
Epoch 59/1500
Epoch 60/1500
Epoch 61/1500
Epoch 62/1500
Epoch 63/1500
Epoch 64/1500
Epoch 65/1500
Epoch 66/1500
Epoch 67/1500
Epoch 68/1500
Epoch 69/1500
Epoch 70/1500
Epoch 71/1500
Epoch 72/1500
E

In [231]:
print(f"Class Weights: {weight_dict}")

Class Weights: {0: 0.6747430249632893, 1: 1.8565656565656565, 2: 1.021111111111111}


In [232]:
# Save the label mapping
label_mapping = {index: label for index, label in enumerate(label_encoder.classes_)}
print(label_mapping)  # This will print the mapping of labels to encoded values

{0: 'adult', 1: 'kitten', 2: 'senior'}


In [233]:
# Evaluate the model on the training set to get total accuracy
loss, accuracy = model_full.evaluate(X_train_full, y_train_full, verbose=0)
print(f"Total Training Set Accuracy: {accuracy * 100:.2f}%")

# Evaluate the model on the demo set to get accuracy
loss, accuracy = model_full.evaluate(X_demo, y_demo, verbose=0)
print(f"Demo Set Accuracy: {accuracy * 100:.2f}%")

# Predict probabilities for the demo samples
probabilities = model_full.predict(X_demo)

# Convert probabilities to class predictions
predictions = np.argmax(probabilities, axis=1)

# Define the label mapping if not already defined
label_mapping = {0: 'Adult', 1: 'Kitten', 2: 'Senior'}

# Map predictions and actual labels to "Kitten", "Adult", or "Senior" classes
mapped_predictions = [label_mapping[pred] for pred in predictions]
mapped_actual_labels = [label_mapping[np.argmax(label)] for label in y_demo]

# Print out the probabilities along with actual labels and predictions
for i in range(len(probabilities)):
    prob_str = ', '.join([f'{label_mapping[j]}: {prob:.4f}' for j, prob in enumerate(probabilities[i])])
    print(f"Sample {i}: Predicted={mapped_predictions[i]}, Actual={mapped_actual_labels[i]}, Probabilities=({prob_str})")


Total Training Set Accuracy: 95.54%
Demo Set Accuracy: 88.89%
Sample 0: Predicted=Adult, Actual=Kitten, Probabilities=(Adult: 0.6903, Kitten: 0.3056, Senior: 0.0041)
Sample 1: Predicted=Kitten, Actual=Kitten, Probabilities=(Adult: 0.3642, Kitten: 0.6280, Senior: 0.0078)
Sample 2: Predicted=Kitten, Actual=Kitten, Probabilities=(Adult: 0.0061, Kitten: 0.9938, Senior: 0.0001)
Sample 3: Predicted=Kitten, Actual=Kitten, Probabilities=(Adult: 0.0340, Kitten: 0.9648, Senior: 0.0012)
Sample 4: Predicted=Kitten, Actual=Kitten, Probabilities=(Adult: 0.4167, Kitten: 0.5605, Senior: 0.0228)
Sample 5: Predicted=Senior, Actual=Senior, Probabilities=(Adult: 0.1161, Kitten: 0.0028, Senior: 0.8811)
Sample 6: Predicted=Senior, Actual=Senior, Probabilities=(Adult: 0.3129, Kitten: 0.0007, Senior: 0.6864)
Sample 7: Predicted=Adult, Actual=Adult, Probabilities=(Adult: 0.9898, Kitten: 0.0002, Senior: 0.0100)
Sample 8: Predicted=Adult, Actual=Adult, Probabilities=(Adult: 0.8689, Kitten: 0.0000, Senior: 0.1311

In [234]:
# Compute the confusion matrix
conf_matrix = confusion_matrix([np.argmax(label) for label in y_demo], predictions)

# Calculate the accuracy per class
class_accuracies = conf_matrix.diagonal() / conf_matrix.sum(axis=1)

# Map the accuracies to class labels
class_accuracy_map = {label_mapping[i]: class_accuracies[i] for i in range(len(class_accuracies))}

# Print the accuracy per class
for class_label, accuracy in class_accuracy_map.items():
    print(f"Accuracy for class {class_label}: {accuracy * 100:.2f}%")

Accuracy for class Adult: 100.00%
Accuracy for class Kitten: 66.67%
Accuracy for class Senior: 100.00%


In [235]:
# Evaluate the model on the training set to get total accuracy
loss, accuracy = model_full.evaluate(X_train_full, y_train_full, verbose=0)
print(f"Total Training Set Accuracy: {accuracy * 100:.2f}%")

# Evaluate the model on the demo set to get accuracy
loss, accuracy = model_full.evaluate(X_demo, y_demo, verbose=0)
print(f"Demo Set Accuracy: {accuracy * 100:.2f}%")

# Predict probabilities for the demo samples
probabilities = model_full.predict(X_demo)

# Convert probabilities to class predictions
predictions = np.argmax(probabilities, axis=1)

# Map predictions and actual labels to "Kitten", "Adult", or "Senior" classes
mapped_predictions = [label_mapping[pred] for pred in predictions]
mapped_actual_labels = [label_mapping[np.argmax(label)] for label in y_demo_encoded]

# Print out the probabilities along with actual labels and predictions
for i in range(len(probabilities)):
    class_probabilities = ", ".join([f"{label_mapping[j]}: {prob:.4f}" for j, prob in enumerate(probabilities[i])])
    print(f"Sample {i}: Predicted={mapped_predictions[i]}, Actual={mapped_actual_labels[i]}, Probabilities=({class_probabilities})")


Total Training Set Accuracy: 95.54%
Demo Set Accuracy: 88.89%
Sample 0: Predicted=Adult, Actual=Adult, Probabilities=(Adult: 0.6903, Kitten: 0.3056, Senior: 0.0041)
Sample 1: Predicted=Kitten, Actual=Adult, Probabilities=(Adult: 0.3642, Kitten: 0.6280, Senior: 0.0078)
Sample 2: Predicted=Kitten, Actual=Adult, Probabilities=(Adult: 0.0061, Kitten: 0.9938, Senior: 0.0001)
Sample 3: Predicted=Kitten, Actual=Adult, Probabilities=(Adult: 0.0340, Kitten: 0.9648, Senior: 0.0012)
Sample 4: Predicted=Kitten, Actual=Adult, Probabilities=(Adult: 0.4167, Kitten: 0.5605, Senior: 0.0228)
Sample 5: Predicted=Senior, Actual=Adult, Probabilities=(Adult: 0.1161, Kitten: 0.0028, Senior: 0.8811)
Sample 6: Predicted=Senior, Actual=Adult, Probabilities=(Adult: 0.3129, Kitten: 0.0007, Senior: 0.6864)
Sample 7: Predicted=Adult, Actual=Kitten, Probabilities=(Adult: 0.9898, Kitten: 0.0002, Senior: 0.0100)
Sample 8: Predicted=Adult, Actual=Kitten, Probabilities=(Adult: 0.8689, Kitten: 0.0000, Senior: 0.1311)
Sam

### Save model

In [236]:
# Save the StandardScaler
joblib.dump(scaler_full, 'scaler_full.pkl')

# Save the trained model
model_full.save('cat_age_model.keras')