In [1]:
# VGGish depends on the following Python packages:
# Numpy
# Resampy
# Tensorflow
# Tf_slim
# Six
# Soundfile
#

from __future__ import print_function

import sys
sys.path.append('/Users/pierrekolingba-froidevaux/Desktop/Deep_Learning/dl4ad-group1/models/research/audioset/vggish')

import numpy as np
import json
import resampy  # pylint: disable=import-error
import tensorflow.compat.v1 as tf
import vggish_input
import vggish_params
import vggish_postprocess
import vggish_slim
import os
import librosa
import glob
import tf_slim as slim
import vggish_params as params


from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report


def main(test_mode=False):
    print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
    global audio_files
    audio_folder_path = '../data/raw_audio_pcm_f32le_16kHz_denoised/'
    if test_mode:
        audio_files = [
            '../data/raw_audio_pcm_f32le_16kHz_denoised/2_99.wav',
            '../data/raw_audio_pcm_f32le_16kHz_denoised/1_60.wav',
            '../data/raw_audio_pcm_f32le_16kHz_denoised/2_92.wav',
            '../data/raw_audio_pcm_f32le_16kHz_denoised/2_97.wav',
            '../data/raw_audio_pcm_f32le_16kHz_denoised/1_1185.wav',
            '../data/raw_audio_pcm_f32le_16kHz_denoised/2_96.wav',
            '../data/raw_audio_pcm_f32le_16kHz_denoised/2_94.wav',
            '../data/raw_audio_pcm_f32le_16kHz_denoised/2_91.wav'
        ]
        print("Running script in test mode...")
    else:
        audio_files = glob.glob(os.path.join(audio_folder_path, '*.wav'))
        print("Running script...")

def usage():
    """
    Print usage instructions for the script.
    """
    print("Usage:")
    print("  python3 feature_extraction.py [test]")
    print()
    print("Description:")
    print("  This script extracts the features from our denoised data.")
    print()
    print("Optional argument:")
    print("  test: Run the script in test mode.")
    print("        In test mode, dataset size = 5.")
    print()


if __name__ == "__main__":
    # Check if the script was executed with the "--help" argument
    if len(sys.argv) > 1 and sys.argv[1] == "--help":
        # Display usage instructions and exit
        usage()
        sys.exit(0)
    # Check if the script was executed with the "test" argument
    elif len(sys.argv) > 1 and sys.argv[1] == "test":
        # Execute script in test mode
        main(test_mode=True)
    else:
        # Execute script normally
        main()


tf.disable_eager_execution()

if not hasattr(tf.flags, 'DEFINE_string'):
    flags = tf.app.flags
else:
    flags = tf.flags

if 'num_batches' not in flags.FLAGS:
    flags.DEFINE_integer('num_batches', 30, 'Number of batches of examples to feed into the model.')

if 'train_vggish' not in flags.FLAGS:
    flags.DEFINE_boolean('train_vggish', True, 'If True, allow VGGish parameters to change during training.')

if 'checkpoint' not in flags.FLAGS:
    flags.DEFINE_string('checkpoint', 'vggish_model.ckpt', 'Path to the VGGish checkpoint file.')

if 'pca_params' not in flags.FLAGS:
    flags.DEFINE_string('pca_params', 'vggish_pca_params.npz', 'Path to the VGGish pca params file.')


FLAGS = flags.FLAGS

Num GPUs Available:  0
Running script...


## Mit Data Augmentation

In [2]:
import json
import os
import librosa
import numpy as np
import vggish_input

# Chemins vers les fichiers et dossiers nécessaires
json_path = '../data/sarcasm_data.json'
audio_folder_path = '../data/raw_audio_pcm_f32le_16kHz_denoised/'
augmented_folder_path = '../Augmentation/aug_audio_denoised/'  # Chemin vers le dossier contenant les audios augmentés

# Chargement des données JSON
with open(json_path, 'r') as f:
    labels_data = json.load(f)

def get_all_examples(audio_folder_path, augmented_folder_path, labels_data):
    all_features = []
    all_labels = []

    # Traitement des fichiers audio originaux
    for file_name in os.listdir(audio_folder_path):
        if file_name.endswith('.wav'):
            # Charger l'audio original
            audio_path = os.path.join(audio_folder_path, file_name)
            audio, sr = librosa.load(audio_path, sr=None)  # Utiliser sr=None pour conserver le taux d'échantillonnage original
            examples = vggish_input.waveform_to_examples(audio, sr)

            # Obtenir le label du fichier audio à partir des données JSON
            original_name = os.path.splitext(file_name)[0]  # Extraire le nom du fichier d'origine sans extension
            if original_name in labels_data:
                is_sarcastic = labels_data[original_name]["sarcasm"]
                label = 1 if is_sarcastic else 0  # 1 pour sarcasme, 0 pour non-sarcasme
            else:
                print(f"La clé {original_name} n'existe pas dans le fichier JSON.")
                continue

            for example in examples:
                all_features.append(example)
                all_labels.append(label)

    # Traitement des fichiers audio augmentés
    for file_name in os.listdir(augmented_folder_path):
        if file_name.endswith('.wav'):
            # Charger l'audio augmenté
            augmented_audio_path = os.path.join(augmented_folder_path, file_name)
            audio, sr = librosa.load(augmented_audio_path, sr=None)
            examples = vggish_input.waveform_to_examples(audio, sr)

            # Obtenir le label du fichier audio à partir des données JSON
            original_name = "_".join(file_name.split('_')[:-1])  # Extraire le nom du fichier d'origine
            if original_name in labels_data:
                is_sarcastic = labels_data[original_name]["sarcasm"]
                label = 1 if is_sarcastic else 0  # 1 pour sarcasme, 0 pour non-sarcasme
            else:
                print(f"La clé {original_name} n'existe pas dans le fichier JSON.")
                continue

            for example in examples:
                all_features.append(example)
                all_labels.append(label)

    return np.array(all_features), np.array(all_labels)

# Exécuter la fonction sur les fichiers originaux et augmentés
features, labels = get_all_examples(audio_folder_path, augmented_folder_path, labels_data)
# 'labels' est le tableau de vos labels
unique, counts = np.unique(labels, return_counts=True)
print(dict(zip(unique, counts)))

{0: 6406, 1: 8327}


## Ohne Data Augmentation

In [2]:
import json
import os
import librosa
import numpy as np
import vggish_input

# Chemins vers les fichiers et dossiers nécessaires
json_path = '../data/sarcasm_data.json'
audio_folder_path = '../data/raw_audio_pcm_f32le_16kHz_denoised/'

# Chargement des données JSON
with open(json_path, 'r') as f:
    labels_data = json.load(f)
    
def get_all_examples(audio_folder_path, labels_data, audio_files):
    all_features = []
    all_labels = []

    for file_path in audio_files:
        file_name = os.path.basename(file_path).replace('.wav', '')

        # Charger l'audio et le convertir en exemple
        audio, sr = librosa.load(file_path, sr=22050)
        examples = vggish_input.waveform_to_examples(audio, sr)

        # Obtenir le label du fichier audio depuis les données JSON
        is_sarcastic = labels_data[file_name]["sarcasm"]
        label = 1 if is_sarcastic else 0  # Label binaire: 1 pour sarcastique, 0 pour non sarcastique

        for example in examples:
            all_features.append(example)
            all_labels.append(label)

    return np.array(all_features), np.array(all_labels)

# Vous devrez fournir la liste des chemins de fichiers audio à la fonction
features, labels = get_all_examples(audio_folder_path, labels_data, audio_files)

In [3]:
# 'labels' est le tableau de vos labels
unique, counts = np.unique(labels, return_counts=True)
print(dict(zip(unique, counts)))

{0: 1505, 1: 1960}


### Dauer der gesamten Aufnahmen

In [8]:
import json
import os
import librosa

# Chemins vers les fichiers et dossiers nécessaires
json_path = '../data/sarcasm_data.json'
audio_folder_path = '../data/raw_audio_pcm_f32le_16kHz_denoised/'

# Chargement des données JSON
with open(json_path, 'r') as f:
    labels_data = json.load(f)

def calculate_durations(audio_folder_path, labels_data, audio_files):
    total_duration_sarcastic = 0.0  # Durée totale des audios sarcastiques
    total_duration_non_sarcastic = 0.0  # Durée totale des audios non sarcastiques

    for file_path in audio_files:
        file_name = os.path.basename(file_path).replace('.wav', '')

        # Charger l'audio pour obtenir sa durée
        audio, sr = librosa.load(file_path, sr=None)
        duration = len(audio) / sr

        # Obtenir le label du fichier audio depuis les données JSON
        is_sarcastic = labels_data[file_name]["sarcasm"]
        if is_sarcastic:
            total_duration_sarcastic += duration
        else:
            total_duration_non_sarcastic += duration

    return total_duration_sarcastic, total_duration_non_sarcastic

# Génération de la liste des chemins de fichiers audio (exemplaire, à ajuster selon votre situation)
audio_files = [os.path.join(audio_folder_path, file) for file in os.listdir(audio_folder_path) if file.endswith('.wav')]

total_duration_sarcastic, total_duration_non_sarcastic = calculate_durations(audio_folder_path, labels_data, audio_files)

print(f"Durée totale des fichiers audio sarcastiques: {total_duration_sarcastic} secondes")
print(f"Durée totale des fichiers audio non sarcastiques: {total_duration_non_sarcastic} secondes")

Durée totale des fichiers audio sarcastiques: 2021.4168125 secondes
Durée totale des fichiers audio non sarcastiques: 1580.9109374999978 secondes


# Feature extraction Embedding with Resampling

## Balancing of classes

### Balancing by increasing sample size of the smaller class

In [30]:
pip install imbalanced-learn

Collecting imbalanced-learn
  Downloading imbalanced_learn-0.12.0-py3-none-any.whl.metadata (8.2 kB)
Downloading imbalanced_learn-0.12.0-py3-none-any.whl (257 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m257.7/257.7 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hInstalling collected packages: imbalanced-learn
Successfully installed imbalanced-learn-0.12.0
Note: you may need to restart the kernel to use updated packages.


In [19]:
from imblearn.over_sampling import RandomOverSampler
import numpy as np

# Initialisation de RandomOverSampler
ros = RandomOverSampler(random_state=42)

# Appliquer le suréchantillonnage sur les labels pour obtenir les indices des données suréchantillonnées
# Ici, nous avons besoin d'aplatir les features si elles sont en 3D pour VGGish
features_flattened = features.reshape(features.shape[0], -1)
features_oversampled, labels_oversampled = ros.fit_resample(features_flattened, labels)

# Reformer les features suréchantillonnées pour correspondre au format attendu par VGGish si nécessaire
features_oversampled = features_oversampled.reshape(-1, vggish_params.NUM_FRAMES, vggish_params.NUM_BANDS)

# Vérifier la nouvelle distribution des classes après suréchantillonnage
unique, counts = np.unique(labels_oversampled, return_counts=True)
print("Nouvelle répartition des classes après suréchantillonnage:", dict(zip(unique, counts)))
# Assurez-vous que les données sous-échantillonnées contiennent bien des instances des deux classes
print(np.unique(labels_oversampled, return_counts=True))

Nouvelle répartition des classes après suréchantillonnage: {0: 1960, 1: 1960}
(array([0, 1]), array([1960, 1960]))


In [20]:
import tensorflow as tf
import vggish_slim, vggish_params, vggish_postprocess
from sklearn.model_selection import train_test_split

def extract_embeddings(features, checkpoint_path, pca_params_path):
    tf.compat.v1.disable_eager_execution()
    pproc = vggish_postprocess.Postprocessor(pca_params_path)
    with tf.compat.v1.Graph().as_default(), tf.compat.v1.Session() as sess:
        vggish_slim.define_vggish_slim(training=False)
        vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path)
        features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME)
        embedding_tensor = sess.graph.get_tensor_by_name(vggish_params.OUTPUT_TENSOR_NAME)
        
        # Assurez-vous que la taille des features correspond à l'attente du réseau
        features_reshaped = features.reshape((-1, vggish_params.NUM_FRAMES, vggish_params.NUM_BANDS))
        
        [embedding_batch] = sess.run([embedding_tensor], feed_dict={features_tensor: features_reshaped})
        postprocessed_batch = pproc.postprocess(embedding_batch)
        return postprocessed_batch

# Chemins vers les fichiers nécessaires
checkpoint_path = './vggish_model.ckpt'
pca_params_path = './vggish_pca_params.npz'

# Utilisation de `features_oversampled` et `labels_oversampled` préparés précédemment

# Extraction des embeddings
embedding_batch_oversampled = extract_embeddings(features_oversampled, checkpoint_path, pca_params_path)

# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(embedding_batch_oversampled, labels_oversampled, test_size=0.2, random_state=42, stratify=labels_oversampled)

# Affichage de la répartition des classes
unique_train, counts_train = np.unique(y_train, return_counts=True)
unique_test, counts_test = np.unique(y_test, return_counts=True)
print("Répartition des classes dans l'ensemble d'entraînement:", dict(zip(unique_train, counts_train)))
print("Répartition des classes dans l'ensemble de test:", dict(zip(unique_test, counts_test)))

INFO:tensorflow:Restoring parameters from ./vggish_model.ckpt




Répartition des classes dans l'ensemble d'entraînement: {0: 1568, 1: 1568}
Répartition des classes dans l'ensemble de test: {0: 392, 1: 392}


### Balancing by decreasing sample size of the bigger class

In [15]:
from imblearn.under_sampling import RandomUnderSampler

rus = RandomUnderSampler(random_state=42)
# Le reshape est utilisé ici pour transformer les labels en une structure acceptable par fit_resample
labels_resampled_indices = rus.fit_resample(np.arange(labels.shape[0]).reshape(-1, 1), labels)[0]

# Utilisez les indices pour sélectionner les observations correspondantes dans features
features_undersampled = features[labels_resampled_indices.flatten()]
labels_undersampled = labels[labels_resampled_indices.flatten()]

# Assurez-vous que les données sous-échantillonnées contiennent bien des instances des deux classes
print(np.unique(labels_undersampled, return_counts=True))

(array([0, 1]), array([1505, 1505]))


In [16]:
# Script 2: Extraction des Embeddings et Préparation des Données

import tensorflow as tf
import vggish_slim, vggish_params, vggish_postprocess
from sklearn.model_selection import train_test_split

def extract_embeddings(features, checkpoint_path, pca_params_path):
    tf.compat.v1.disable_eager_execution()
    pproc = vggish_postprocess.Postprocessor(pca_params_path)
    with tf.compat.v1.Graph().as_default(), tf.compat.v1.Session() as sess:
        vggish_slim.define_vggish_slim(training=False)
        vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path)
        features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME)
        embedding_tensor = sess.graph.get_tensor_by_name(vggish_params.OUTPUT_TENSOR_NAME)
        
        # Adapter la taille des features pour correspondre à l'attente du réseau
        features_reshaped = features.reshape((-1, vggish_params.NUM_FRAMES, vggish_params.NUM_BANDS))
        
        [embedding_batch] = sess.run([embedding_tensor], feed_dict={features_tensor: features_reshaped})
        postprocessed_batch = pproc.postprocess(embedding_batch)
        return postprocessed_batch

# Chemins vers les fichiers nécessaires
checkpoint_path = './vggish_model.ckpt'
pca_params_path = './vggish_pca_params.npz'

# Assurez-vous que `features_undersampled` et `labels_undersampled` sont définis
# Ces variables devraient déjà être disponibles en mémoire si le script de sous-échantillonnage a été exécuté auparavant dans la même session

# Extraction des embeddings
embedding_batch = extract_embeddings(features_undersampled, checkpoint_path, pca_params_path)

# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(embedding_batch, labels_undersampled, test_size=0.2, random_state=42, stratify=labels_undersampled)

# Affichage de la répartition des classes
unique_train, counts_train = np.unique(y_train, return_counts=True)
unique_test, counts_test = np.unique(y_test, return_counts=True)
print("Répartition des classes dans l'ensemble d'entraînement:", dict(zip(unique_train, counts_train)))
print("Répartition des classes dans l'ensemble de test:", dict(zip(unique_test, counts_test)))

INFO:tensorflow:Restoring parameters from ./vggish_model.ckpt




Répartition des classes dans l'ensemble d'entraînement: {0: 1204, 1: 1204}
Répartition des classes dans l'ensemble de test: {0: 301, 1: 301}


# Feature extraction Embedding

In [32]:
def extract_embeddings(features, checkpoint_path, pca_params_path):
    import tensorflow as tf
    import vggish_slim, vggish_params, vggish_postprocess

    # Use TensorFlow 1.x compatibility mode
    tf.compat.v1.disable_eager_execution()
    
    # Initialize the post-processor
    pproc = vggish_postprocess.Postprocessor(pca_params_path)

    with tf.compat.v1.Graph().as_default(), tf.compat.v1.Session() as sess:
        # Initialize VGGish and load the checkpoint
        vggish_slim.define_vggish_slim(training=False)
        vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path)

        # Locate input and output tensors
        features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME)
        embedding_tensor = sess.graph.get_tensor_by_name(vggish_params.OUTPUT_TENSOR_NAME)

        # Run the model to obtain embeddings
        [embedding_batch] = sess.run([embedding_tensor], feed_dict={features_tensor: features})
        print(embedding_batch)

        # Apply post-processing - PCA (whitens the data)
        postprocessed_batch = pproc.postprocess(embedding_batch)
        print(postprocessed_batch)
        
        return postprocessed_batch

# Example usage (make sure to replace 'checkpoint_path' and 'pca_params_path' with the actual paths to your files)
checkpoint_path = './vggish_model.ckpt'
pca_params_path = './vggish_pca_params.npz'
embedding_batch = extract_embeddings(features, checkpoint_path, pca_params_path)

labels = np.array(labels)

from sklearn.model_selection import train_test_split

# Diviser les données tout en maintenant la répartition des classes
X_train, X_test, y_train, y_test = train_test_split(embedding_batch, labels, test_size=0.2, random_state=42, stratify=labels)

# Vérifier la répartition des classes dans l'ensemble d'entraînement et de test
unique_train, counts_train = np.unique(y_train, return_counts=True)
unique_test, counts_test = np.unique(y_test, return_counts=True)

print("Répartition des classes dans l'ensemble d'entraînement:", dict(zip(unique_train, counts_train)))
print("Répartition des classes dans l'ensemble de test:", dict(zip(unique_test, counts_test)))

INFO:tensorflow:Restoring parameters from ./vggish_model.ckpt


INFO:tensorflow:Restoring parameters from ./vggish_model.ckpt


[[-0.8820404  -0.00874145  0.38078845 ... -0.6591561   0.06713051
  -0.36917007]
 [-0.43646303  0.05022906  0.11660872 ... -0.4471433  -0.04523692
  -0.30223668]
 [ 0.29241273 -0.17487231 -0.09087229 ...  0.18623087 -0.04541071
  -0.45185494]
 ...
 [-0.25952086 -0.20381099  0.17393252 ... -0.06592399 -0.00994544
  -0.19809008]
 [ 0.23226415  0.1085295  -0.03712422 ...  0.23585996  0.05718157
  -0.3611402 ]
 [-0.11748618 -0.10055816  0.27917695 ...  0.11004415  0.00833523
  -0.40331632]]
[[ 80 101  64 ... 118 255 168]
 [ 93  78 132 ... 229 255 207]
 [ 37  88 153 ... 246 218   0]
 ...
 [121  57 146 ...  97 159 203]
 [ 53  73 189 ...  47   4  86]
 [ 87  44 169 ...  44 214 255]]
Répartition des classes dans l'ensemble d'entraînement: {0: 1204, 1: 1565}
Répartition des classes dans l'ensemble de test: {0: 301, 1: 392}


In [33]:
# 'labels' est le tableau de vos labels
unique, counts = np.unique(labels, return_counts=True)
print(dict(zip(unique, counts)))

{0: 1505, 1: 1957}


In [11]:
print("Taille de l'ensemble d'entraînement:", len(X_train))
print("Taille de l'ensemble de test:", len(X_test))

Taille de l'ensemble d'entraînement: 2769
Taille de l'ensemble de test: 693


# Training with Dataset

## Mit Keras

### training with oversampled dataset from smaller class

In [36]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

_NUM_CLASSES = 1  # Pour une classification binaire, utilisez 1 unité de sortie

def train_and_evaluate_model(X_train, y_train, X_test, y_test, epochs, batch_size):
    # Création du modèle
    model = Sequential([
        Dense(4096, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(_NUM_CLASSES, activation='sigmoid')  # Utilisez 'sigmoid' pour la classification binaire
    ])

    # Compilation du modèle
    optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=0.01)
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',  # 'binary_crossentropy' pour la classification binaire
                  metrics=['accuracy'])

    # Configuration des callbacks
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')
    model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True, mode='min', verbose=1)

    # Entraînement du modèle
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, callbacks=[early_stopping, model_checkpoint])

    # Chargement du meilleur modèle sauvegardé
    model.load_weights('best_model.h5')

    # Évaluation du modèle
    predictions = model.predict(X_test)
    predictions = np.round(predictions)
    
    print(classification_report(y_test, predictions))

# Exemple d'utilisation
# Assurez-vous que 'embedding_batch_oversampled' et 'labels_oversampled' ont été correctement préparés auparavant
X_train, X_test, y_train, y_test = train_test_split(embedding_batch_oversampled, labels_oversampled, test_size=0.2, stratify=labels_oversampled) #, random_state=42,

epochs = 50
batch_size = 32

train_and_evaluate_model(X_train, y_train, X_test, y_test, epochs, batch_size)

Train on 2508 samples, validate on 628 samples
Epoch 1/50
  32/2508 [..............................] - ETA: 3s - loss: 7.9668 - accuracy: 0.3438

2024-03-11 17:28:38.283330: W tensorflow/c/c_api.cc:305] Operation '{name:'training_52/Adam/dense_52/kernel/v/Assign' id:9626 op device:{requested: '', assigned: ''} def:{{{node training_52/Adam/dense_52/kernel/v/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](training_52/Adam/dense_52/kernel/v, training_52/Adam/dense_52/kernel/v/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


Epoch 1: val_loss improved from inf to 2.34075, saving model to best_model.h5


  updates = self.state_updates
2024-03-11 17:28:38.572864: W tensorflow/c/c_api.cc:305] Operation '{name:'loss_26/mul' id:9471 op device:{requested: '', assigned: ''} def:{{{node loss_26/mul}} = Mul[T=DT_FLOAT, _has_manual_control_dependencies=true](loss_26/mul/x, loss_26/dense_53_loss/value)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
  saving_api.save_model(


Epoch 2/50
Epoch 2: val_loss improved from 2.34075 to 0.75070, saving model to best_model.h5
Epoch 3/50
Epoch 3: val_loss improved from 0.75070 to 0.68193, saving model to best_model.h5
Epoch 4/50
Epoch 4: val_loss did not improve from 0.68193
Epoch 5/50
Epoch 5: val_loss did not improve from 0.68193
Epoch 6/50
Epoch 6: val_loss did not improve from 0.68193
Epoch 7/50
Epoch 7: val_loss did not improve from 0.68193
Epoch 8/50
Epoch 8: val_loss did not improve from 0.68193
Epoch 9/50
Epoch 9: val_loss did not improve from 0.68193
Epoch 10/50
Epoch 10: val_loss did not improve from 0.68193
Epoch 11/50
Epoch 11: val_loss did not improve from 0.68193
Epoch 12/50
Epoch 12: val_loss did not improve from 0.68193
Epoch 13/50
Epoch 13: val_loss did not improve from 0.68193
Epoch 13: early stopping
              precision    recall  f1-score   support

           0       0.59      0.54      0.56       392
           1       0.57      0.62      0.60       392

    accuracy                         

  updates=self.state_updates,
2024-03-11 17:28:40.770080: W tensorflow/c/c_api.cc:305] Operation '{name:'dense_53/Sigmoid' id:9414 op device:{requested: '', assigned: ''} def:{{{node dense_53/Sigmoid}} = Sigmoid[T=DT_FLOAT, _has_manual_control_dependencies=true](dense_53/BiasAdd)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


### undersampling from the bigger class

In [18]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

_NUM_CLASSES = 1  # Pour une classification binaire, utilisez 1 unité de sortie

def train_and_evaluate_model(X_train, y_train, X_test, y_test, epochs, batch_size):
    # Création du modèle
    model = Sequential([
        Dense(4096, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(_NUM_CLASSES, activation='sigmoid')  # Utilisez 'sigmoid' pour la classification binaire
    ])

    # Compilation du modèle
    optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',  # 'binary_crossentropy' pour la classification binaire
                  metrics=['accuracy'])

    # Configuration des callbacks
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')
    model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True, mode='min', verbose=1)

    # Entraînement du modèle
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, callbacks=[early_stopping, model_checkpoint])

    # Chargement du meilleur modèle sauvegardé
    model.load_weights('best_model.h5')

    # Évaluation du modèle
    predictions = model.predict(X_test)
    predictions = np.round(predictions)
    
    print(classification_report(y_test, predictions))

# Exemple d'utilisation
# Remplacez embedding_batch, labels par vos données réelles + labels over ou under
X_train, X_test, y_train, y_test = train_test_split(embedding_batch, labels_undersampled, test_size=0.2, random_state=42, stratify=labels_undersampled)

epochs = 10
batch_size = 32  # Définissez la taille du lot en fonction de votre jeu de données et de la capacité de votre machine

train_and_evaluate_model(X_train, y_train, X_test, y_test, epochs, batch_size)

Train on 1926 samples, validate on 482 samples
Epoch 1/10

2024-03-11 16:28:01.784636: W tensorflow/c/c_api.cc:305] Operation '{name:'training_20/Adam/beta_2/Assign' id:3823 op device:{requested: '', assigned: ''} def:{{{node training_20/Adam/beta_2/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](training_20/Adam/beta_2, training_20/Adam/beta_2/Initializer/initial_value)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
  updates = self.state_updates
2024-03-11 16:28:01.950028: W tensorflow/c/c_api.cc:305] Operation '{name:'loss_10/mul' id:3711 op device:{requested: '', assigned: ''} def:{{{node loss_10/mul}} = Mul[T=DT_FLOAT, _has_manual_control_dependencies=true](loss_10/mul/x, loss_10/dense_21_loss/value)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an


Epoch 1: val_loss improved from inf to 4.79783, saving model to best_model.h5
Epoch 2/10
Epoch 2: val_loss did not improve from 4.79783
Epoch 3/10
  32/1926 [..............................] - ETA: 0s - loss: 7.2752 - accuracy: 0.3125

  saving_api.save_model(


Epoch 3: val_loss improved from 4.79783 to 3.63407, saving model to best_model.h5
Epoch 4/10
Epoch 4: val_loss improved from 3.63407 to 1.86599, saving model to best_model.h5
Epoch 5/10
Epoch 5: val_loss did not improve from 1.86599
Epoch 6/10
Epoch 6: val_loss improved from 1.86599 to 1.69780, saving model to best_model.h5
Epoch 7/10
Epoch 7: val_loss improved from 1.69780 to 1.55547, saving model to best_model.h5
Epoch 8/10
Epoch 8: val_loss improved from 1.55547 to 1.44272, saving model to best_model.h5
Epoch 9/10
Epoch 9: val_loss improved from 1.44272 to 1.13118, saving model to best_model.h5
Epoch 10/10
Epoch 10: val_loss did not improve from 1.13118
              precision    recall  f1-score   support

           0       0.68      0.33      0.44       301
           1       0.56      0.85      0.67       301

    accuracy                           0.59       602
   macro avg       0.62      0.59      0.56       602
weighted avg       0.62      0.59      0.56       602



  updates=self.state_updates,
2024-03-11 16:28:03.060505: W tensorflow/c/c_api.cc:305] Operation '{name:'dense_21/Sigmoid' id:3654 op device:{requested: '', assigned: ''} def:{{{node dense_21/Sigmoid}} = Sigmoid[T=DT_FLOAT, _has_manual_control_dependencies=true](dense_21/BiasAdd)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


## Training based on Vggish tuto

In [54]:
import numpy as np
import tensorflow.compat.v1 as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

tf.disable_v2_behavior()  # Désactiver le comportement de TensorFlow 2 pour utiliser tf.compat.v1

# Définir le nombre de lots num_batches
num_batches = 30
_NUM_CLASSES = 1  # Pour une classification binaire

def train_and_evaluate_model(X_train, y_train, X_test, y_test):
    with tf.Graph().as_default():
        sess = tf.Session()
        with sess.as_default():
            # Placeholders pour les embeddings et les labels
            embeddings_input = tf.placeholder(tf.float32, shape=[None, X_train.shape[1]], name='embeddings_input')
            labels_input = tf.placeholder(tf.float32, shape=[None, _NUM_CLASSES], name='labels_input')

            # Definition du modèle avec une seule couche cachée
            with tf.variable_scope('custom_layer'):
                net = tf.identity(embeddings_input)  # Utiliser les embeddings comme entrée
                # Couche cachée fc1
                fc1 = tf.compat.v1.layers.dense(net, 4096, activation=tf.nn.relu, name='fc1')
                # Couche de sortie fc2 avec activation sigmoïde
                logits = tf.compat.v1.layers.dense(fc1, _NUM_CLASSES, activation=tf.nn.sigmoid, name='fc2')  # Utilisation directe de sigmoid dans la couche de sortie

            # Loss et optimizer
            loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels_input))       
            train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

            # Initialisation des variables
            sess.run(tf.global_variables_initializer())

            # Assurez-vous que y_train et y_test ont la bonne forme
            y_train = y_train.reshape(-1, 1)
            y_test = y_test.reshape(-1, 1)

            # Entraînement
            for i in range(num_batches):
                _, loss_value = sess.run([train_op, loss], feed_dict={embeddings_input: X_train, labels_input: y_train})
                print(f'Batch {i}: Loss {loss_value}')

            # Calcul des logits sur l'ensemble de test
            logits_test = sess.run(logits, feed_dict={embeddings_input: X_test})

            # Convertir les logits en prédictions binaires, la conversion est inutile ici car 'logits' utilise déjà 'sigmoid'
            predictions = (logits_test > 0.5).astype(int)

            # Évaluation
            print(classification_report(y_test, predictions))

# Assurez-vous d'avoir X_train, X_test, y_train, y_test définis correctement
train_and_evaluate_model(X_train, y_train, X_test, y_test)

  fc1 = tf.compat.v1.layers.dense(net, 4096, activation=tf.nn.relu, name='fc1')
  logits = tf.compat.v1.layers.dense(fc1, _NUM_CLASSES, activation=tf.nn.sigmoid, name='fc2')  # Utilisation directe de sigmoid dans la couche de sortie


Batch 0: Loss 0.775765597820282
Batch 1: Loss 0.693146288394928
Batch 2: Loss 0.693146288394928
Batch 3: Loss 0.693146288394928
Batch 4: Loss 0.693146288394928
Batch 5: Loss 0.693146288394928
Batch 6: Loss 0.693146288394928
Batch 7: Loss 0.693146288394928
Batch 8: Loss 0.693146288394928
Batch 9: Loss 0.693146288394928
Batch 10: Loss 0.693146288394928
Batch 11: Loss 0.693146288394928
Batch 12: Loss 0.693146288394928
Batch 13: Loss 0.693146288394928
Batch 14: Loss 0.693146288394928
Batch 15: Loss 0.693146288394928
Batch 16: Loss 0.693146288394928
Batch 17: Loss 0.693146288394928
Batch 18: Loss 0.693146288394928
Batch 19: Loss 0.693146288394928
Batch 20: Loss 0.693146288394928
Batch 21: Loss 0.693146288394928
Batch 22: Loss 0.693146288394928
Batch 23: Loss 0.693146288394928
Batch 24: Loss 0.693146288394928
Batch 25: Loss 0.693146288394928
Batch 26: Loss 0.693146288394928
Batch 27: Loss 0.693146288394928
Batch 28: Loss 0.693146288394928
Batch 29: Loss 0.693146288394928
              precis

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [11]:
print("Quelques labels de test:", y_test[:10])

Quelques labels de test: [[1 0]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [1 0]
 [1 0]
 [1 0]
 [1 0]]


In [6]:
# Let's first load the JSON file and then enumerate the amount of data with sarcasm = true and sarcasm = false.

import json

# Load the JSON data
with open('../data/sarcasm_data.json', 'r') as file:
    sarcasm_data = json.load(file)

# Counters for sarcasm = true and sarcasm = false
sarcasm_true_count = 0
sarcasm_false_count = 0

# Enumerate the amount of data
for key, value in sarcasm_data.items():
    if value["sarcasm"]:
        sarcasm_true_count += 1
    else:
        sarcasm_false_count += 1

print(f'Sarcasm true: {sarcasm_true_count}, sarcasm not true: {sarcasm_false_count}')

Sarcasm true: 345, sarcasm not true: 345
