In [1]:
!pip install pandas
!pip install matplotlib
!pip install seaborn
!pip install tensorflow
!pip install tensorboard

[1;31merror[0m: [1mexternally-managed-environment[0m

[31m×[0m This environment is externally managed
[31m╰─>[0m To install Python packages system-wide, try apt install
[31m   [0m python3-xyz, where xyz is the package you are trying to
[31m   [0m install.
[31m   [0m 
[31m   [0m If you wish to install a non-Debian-packaged Python package,
[31m   [0m create a virtual environment using python3 -m venv path/to/venv.
[31m   [0m Then use path/to/venv/bin/python and path/to/venv/bin/pip. Make
[31m   [0m sure you have python3-full installed.
[31m   [0m 
[31m   [0m If you wish to install a non-Debian packaged Python application,
[31m   [0m it may be easiest to use pipx install xyz, which will manage a
[31m   [0m virtual environment for you. Make sure you have pipx installed.
[31m   [0m 
[31m   [0m See /usr/share/doc/python3.12/README.venv for more information.

[1;35mnote[0m: If you believe this is a mistake, please contact your Python insta

In [1]:
import pandas as pd

In [3]:
data_path = r"D:\datasets\cv-corpus-19.0-2024-09-13-fr\cv-corpus-19.0-2024-09-13\fr"
data_path = r"/mnt/d/datasets/cv-corpus-19.0-2024-09-13-fr/cv-corpus-19.0-2024-09-13/fr"

train_age_only = False
use_ordinal_age = False

# Hyperparameters
loss_age_weight = 1
loss_genre_weight = 1
learning_rate = 0.0001
epochs = 10
batch_size = 32
num_age_classes = 7
train_ratio = 0.90

In [4]:
from sklearn.model_selection import train_test_split


def preprocess_label_data(label_data, keep_age=False):
    gender_dict = {
        'male_masculine': 0,
        'female_feminine': 1
    }
    age_dict = {
        'teens': 0,
        'twenties': 1,
        'thirties': 2,
        'fourties': 3,
        'fifties': 4,
        'sixties': 5,
        'seventies': 6
    }
    label_data = label_data.copy()
    if keep_age:
        serie_age = label_data['age'].copy()
    if use_ordinal_age:
        label_data['age'] = label_data['age'].map(age_dict)
    else:
        label_data = pd.get_dummies(label_data, columns=['age'])
    label_data['gender'] = label_data['gender'].map(gender_dict)
    return pd.concat([label_data, serie_age], axis=1) if keep_age else label_data

In [5]:
import numpy as np
import pandas as pd


def load_csv_data(csv_path):
    """
    Load CSV data into a DataFrame with specific columns.
    """
    df = pd.read_csv(csv_path)
    return df


def create_dataset(csv_path, batch_size, num_age_classes, train_ratio=0.8, random_state=0):
    """
    Create a tf.data.Dataset from a CSV file.
    """
    # Load CSV and preprocess
    df = load_csv_data(csv_path)
    df = preprocess_label_data(df)

    # Split the data
    train_data, val_data = train_test_split(df, train_size=train_ratio, random_state=random_state,
                                            stratify=df['gender'])

    # Convert columns to tensors
    def convert_to_tensors(data):
        paths = tf.convert_to_tensor(data['path'].values, dtype=tf.string)
        ages = tf.convert_to_tensor(data['age'] if use_ordinal_age else data.iloc[:, 3:].values,
                                    dtype=tf.float32)  # Assuming age columns start from index 3
        if not train_age_only:
            genders = tf.convert_to_tensor(data['gender'].values, dtype=tf.int32)
            return paths, genders, ages
        else:
            return paths, ages

    if not train_age_only:
        train_paths, train_genders, train_ages = convert_to_tensors(train_data)
        val_paths, val_genders, val_ages = convert_to_tensors(val_data)
        # Create datasets from tensors
        train_dataset = tf.data.Dataset.from_tensor_slices((train_paths, train_genders, train_ages))
        val_dataset = tf.data.Dataset.from_tensor_slices((val_paths, val_genders, val_ages))

        # Parse rows, and batch
        train_dataset = (
            train_dataset
            .map(lambda path, gender, age: tf_parse_row(path, gender, age, num_age_classes),
                 num_parallel_calls=tf.data.AUTOTUNE)
            .batch(batch_size)
            .prefetch(tf.data.AUTOTUNE)  # Prefetch for efficient data loading
        )

        val_dataset = (
            val_dataset
            .map(lambda path, gender, age: tf_parse_row(path, gender, age, num_age_classes),
                 num_parallel_calls=tf.data.AUTOTUNE)
            .batch(batch_size)
            .prefetch(tf.data.AUTOTUNE)  # Prefetch for efficient data loading
        )
    else:
        train_paths, train_ages = convert_to_tensors(train_data)
        val_paths, val_ages = convert_to_tensors(val_data)
        # Create datasets from tensors
        train_dataset = tf.data.Dataset.from_tensor_slices((train_paths, train_ages))
        val_dataset = tf.data.Dataset.from_tensor_slices((val_paths, val_ages))

        # Parse rows, and batch
        train_dataset = (
            train_dataset
            .map(lambda path, age: tf_parse_row(path, None, age, num_age_classes),
                 num_parallel_calls=tf.data.AUTOTUNE)
            .batch(batch_size)
            .prefetch(tf.data.AUTOTUNE)  # Prefetch for efficient data loading
        )

        val_dataset = (
            val_dataset
            .map(lambda path, age: tf_parse_row(path, None, age, num_age_classes),
                 num_parallel_calls=tf.data.AUTOTUNE)
            .batch(batch_size)
            .prefetch(tf.data.AUTOTUNE)  # Prefetch for efficient data loading
        )
    return train_dataset, val_dataset


def tf_parse_row(path, gender, age, num_age_classes):
    """
    Wrapper to use the parse_row function with TensorFlow.
    """

    if not train_age_only:
        features, age_label, gender_label = tf.py_function(
            func=parse_row,
            inp=[path, gender, age],
            Tout=(tf.float32, tf.float32, tf.int32)
        )

        # Set shapes for TensorFlow to understand
        features.set_shape([128, 862])  # Ajustez cette forme en fonction de vos données
        age_label.set_shape([] if use_ordinal_age else [num_age_classes])
        gender_label.set_shape([])
        labels = {"age": age_label, "gender": gender_label}
    else:
        features, age_label = tf.py_function(
            func=parse_row_age,
            inp=[path, age],
            Tout=(tf.float32, tf.float32)
        )

        # Set shapes for TensorFlow to understand
        features.set_shape([128, 862])  # Ajustez cette forme en fonction de vos données
        age_label.set_shape([] if use_ordinal_age else [num_age_classes])
        labels = {"age": age_label}
    return features, labels


def parse_row(path, gender, age):
    """
    Parse a single row to extract features and labels.
    """
    # Load the numpy file (feature input)
    features = np.load(f"{data_path}/processed/{path.numpy().decode('utf-8').replace('mp3', 'npy')}")
    return features, age, gender


def parse_row_age(path, age):
    """
    Parse a single row to extract features and labels.
    """
    # Load the numpy file (feature input)
    features = np.load(f"{data_path}/processed/{path.numpy().decode('utf-8').replace('mp3', 'npy')}")
    return features, age



In [6]:
import tensorflow as tf
from tensorflow.keras import layers, models, initializers


class ConvBlock(layers.Layer):
    def __init__(self, filters, kernel_size, strides=(1, 1), padding='same', activation='relu',
                 kernel_initializer='he_normal', batch_norm=True, max_pool=True, dropout_rate=0.0):
        super(ConvBlock, self).__init__()
        self.conv = layers.Conv2D(
            filters=filters,
            kernel_size=kernel_size,
            strides=strides,
            padding=padding,
            kernel_initializer=kernel_initializer
        )
        self.batch_norm = layers.BatchNormalization() if batch_norm else None
        self.activation = layers.Activation(activation)
        self.max_pool = layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same') if max_pool else None
        self.dropout = layers.Dropout(dropout_rate) if dropout_rate > 0 else None

    def call(self, inputs, training=False):
        x = self.conv(inputs, training=training)
        if self.batch_norm:
            x = self.batch_norm(x, training=training)
        x = self.activation(x)
        if self.max_pool:
            x = self.max_pool(x)
        if self.dropout:
            x = self.dropout(x, training=training)
        return x


class AudioAgeAndGenderClassifier(tf.keras.Model):
    def __init__(self, input_shape=(128, 862)):
        super(AudioAgeAndGenderClassifier, self).__init__()
        self.blocks = [
            ConvBlock(filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu',
                      kernel_initializer='he_normal', batch_norm=True, max_pool=True, dropout_rate=0.0),
            ConvBlock(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu',
                      kernel_initializer='he_normal', batch_norm=True, max_pool=True, dropout_rate=0.0),
            ConvBlock(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu',
                      kernel_initializer='he_normal', batch_norm=True, max_pool=True, dropout_rate=0.1),
            ConvBlock(filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu',
                      kernel_initializer='he_normal', batch_norm=True, max_pool=True, dropout_rate=0.5),
            ConvBlock(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu',
                      kernel_initializer='he_normal', batch_norm=True, max_pool=True, dropout_rate=0.5),
            # ConvBlock(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu',
            #             kernel_initializer='he_normal', batch_norm=True, max_pool=True, dropout_rate=0.0),
        ]

        self.flatten = layers.Flatten()
        self.dense = layers.Dense(128, activation='relu')
        self.age_output = layers.Dense(1 if use_ordinal_age else num_age_classes,
                                       activation="sigmoid" if use_ordinal_age else 'softmax')
        if not train_age_only:
            self.gender_output = layers.Dense(1, activation='sigmoid')

    def call(self, inputs, training=False):
        x = tf.expand_dims(inputs, axis=-1)
        # x = inputs
        for block in self.blocks:
            x = block(x, training=training)

        x = self.flatten(x)
        x = self.dense(x)
        age_pred = self.age_output(x)

        if not train_age_only:
            gender_pred = self.gender_output(x)
            return {"age": age_pred, "gender": gender_pred}
        else:
            return {"age": age_pred}

    def build(self, input_shape):
        self.input_spec = tf.keras.layers.InputSpec(shape=input_shape)
        super(AudioAgeAndGenderClassifier, self).build(input_shape)


# Instantiate the model
device_name = tf.test.gpu_device_name()
if device_name:
    print(f"Using GPU: {device_name}")
else:
    print("Using CPU")

model = AudioAgeAndGenderClassifier()

# Build the model with input shape
input_shape = (batch_size, 128, 862)  # Example input shape: (Batch, Height, Width)
model.build(input_shape)
model.summary()

Using GPU: /device:GPU:0


I0000 00:00:1733396890.235426   86300 gpu_device.cc:2022] Created device /device:GPU:0 with 5563 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


In [7]:
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay


def scaled_mse(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_true / num_age_classes - 1 - y_pred))


# Compile the model
def compile_model(model, learning_rate=0.001):
    # Define separate losses for age and gender
    losses = {
        "age": scaled_mse if use_ordinal_age else tf.keras.losses.CategoricalCrossentropy(),
    }
    metrics = {
        "age": [tf.keras.metrics.MeanSquaredError() if use_ordinal_age else tf.keras.metrics.CategoricalAccuracy()],
    }
    if not train_age_only:
        losses["gender"] = tf.keras.losses.BinaryCrossentropy()
        metrics["gender"] = [tf.keras.metrics.BinaryAccuracy()]

    # Compile the model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss=losses,
        loss_weights=[loss_age_weight, loss_genre_weight] if not train_age_only else [loss_age_weight],
        metrics=metrics,
    )
    return model

def plot_confusion_matrix(y_test_classes, y_pred_classes, display_labels, title="Confusion Matrix"):
    if not use_ordinal_age:
        y_test_classes = np.argmax(y_test_classes, axis=1)
        y_pred_classes = np.argmax(y_pred_classes, axis=1)
    # Compute the confusion matrix
    cm = confusion_matrix(y_test_classes, y_pred_classes)

    # Optionally normalize the confusion matrix
    # cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    # Display the confusion matrix
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=display_labels)

    # Plot the matrix
    disp.plot(cmap=plt.cm.Blues)
    plt.title(title)
    plt.show()

def print_example_output(model, dataset):
    for batch in dataset.take(1):
        features, labels = batch
        # Plot confusion matrix using pyplot
        predictions = model.predict(features)
        plot_confusion_matrix(labels["age"], predictions["age"], np.arange(0, num_age_classes), title="Age Confusion Matrix")
        if not train_age_only:
            plot_confusion_matrix(labels["gender"], predictions["gender"], ['male', 'female'], title="Gender Confusion Matrix")


# Train the model
def train_model(model, dataset, validation_dataset, epochs, batch_size, verbose=0):
    # Define callbacks
    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor="val_loss", patience=4, restore_best_weights=True
        ),
        tf.keras.callbacks.ModelCheckpoint(
            "best_model.keras", save_best_only=True, monitor="val_loss"
        ),
        tf.keras.callbacks.TensorBoard(log_dir="./logs"),
        tf.keras.callbacks.LambdaCallback(
            on_epoch_end=lambda epoch, logs: print_example_output(model, val_dataset)
        ),
    ]

    # Train the model
    history = model.fit(
        dataset,
        validation_data=validation_dataset,
        epochs=epochs,
        batch_size=batch_size,
        callbacks=callbacks,
        verbose=verbose,
    )
    return history


csv_path = f"{data_path}/validated_filtered_5000_per_age.csv"

train_dataset, val_dataset = create_dataset(csv_path, batch_size, num_age_classes, train_ratio)

print(train_dataset)
for batch in train_dataset.take(1):
    features, labels = batch
    print("Features shape:", features.shape)
    print("Age labels shape:", labels["age"].shape)
    if not train_age_only:
        print("Gender labels shape:", labels["gender"].shape)

# Initialize and compile the model
model = AudioAgeAndGenderClassifier()
model = compile_model(model, learning_rate=learning_rate)
model.summary()

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 128, 862), dtype=tf.float32, name=None), {'age': TensorSpec(shape=(None, 7), dtype=tf.float32, name=None), 'gender': TensorSpec(shape=(None,), dtype=tf.int32, name=None)})>
Features shape: (32, 128, 862)
Age labels shape: (32, 7)
Gender labels shape: (32,)


2024-12-05 12:08:11.331613: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [10]:
# Train the model
history = train_model(model, train_dataset, val_dataset, epochs, batch_size, verbose=1)

Epoch 1/10


KeyboardInterrupt: 