In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
! pip install datasets --quiet
! pip install evaluate --quiet
! pip install transformers --quiet
!pip install huggingface_hub --quiet

!pip install accelerate -U --quiet
!pip install transformers[torch] --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.0/280.0 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
from datasets import load_dataset
from datasets import Audio
from datasets import DatasetDict
from datasets import Dataset as DT

import numpy as np

from transformers import AutoModelForAudioClassification, TrainingArguments, Trainer
from transformers import AutoFeatureExtractor
from transformers import EarlyStoppingCallback

import torch
import evaluate

from sklearn.model_selection import KFold
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import train_test_split

import seaborn as sns
import matplotlib.pyplot as plt
import librosa

from collections import defaultdict

import torch


import os
import csv



In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [None]:
def window_audio(audio_array, window_size=10, overlap=0.75):
    sr = 16000
    window_size_samples = int(window_size * sr)
    overlap_samples = int(window_size_samples * overlap)

    windows = []
    for i in range(0, len(audio_array) - window_size_samples, overlap_samples):
        window = audio_array[i:i + window_size_samples]
        windows.append(window)

    return windows


def preprocess_function(examples):
    audio_arrays = [x["array"] for x in examples["audio"]]
    inputs = feature_extractor(
        audio_arrays, sampling_rate=feature_extractor.sampling_rate, max_length=16000*10, truncation=True)
    return inputs

def compute_metrics(eval_pred):
    predictions = np.argmax(eval_pred.predictions, axis=1)

    f1 = f1_score(y_true=eval_pred.label_ids, y_pred=predictions, average='weighted')
    accuracy = accuracy_score(y_true=eval_pred.label_ids, y_pred=predictions)

    return {
        "f1_score": f1,
        "accuracy": accuracy
    }


def apply_window(dataset):
    windowed_dataset = defaultdict(list)

    for instance_id, example in enumerate(dataset):
        audio_array = example["audio"]["array"]
        windows = window_audio(audio_array)

        for window in windows:
            windowed_dataset["audio"].append({"array": window})
            windowed_dataset["label"].append(example["label"])
            windowed_dataset["instance_id"].append(instance_id)

    windowed_dataset = DT.from_dict(windowed_dataset)
    return windowed_dataset







In [None]:
def evaluate_test_set(test_dataset, train_val_dataset, training_args, modelname):
    encoded_train_val_dataset = train_val_dataset.map(preprocess_function, remove_columns="audio", batched=True)
    encoded_test_dataset = test_dataset.map(preprocess_function, remove_columns="audio", batched=True)

    model = AutoModelForAudioClassification.from_pretrained("facebook/hubert-base-ls960", num_labels=num_labels)
    model.to(device)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=encoded_train_val_dataset,
        eval_dataset=encoded_test_dataset,
        tokenizer=feature_extractor,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(3, 0.0)]
    )

    trainer.train()
    trainer.save_model(f"/content/drive/MyDrive/{modelname}/model_{modelname}_final")
    eval_results = trainer.evaluate()

    predictions = trainer.predict(encoded_test_dataset)

    # Apply averaging using instance_id
    instance_id_predictions = defaultdict(list)
    for i, instance_id in enumerate(test_dataset["instance_id"]):
        instance_id_predictions[instance_id].append(predictions.predictions[i])

    for instance_id, instance_predictions in instance_id_predictions.items():
        mean_prediction = np.mean(instance_predictions, axis=0)
        eval_results[f'eval_predictions_{instance_id}'] = mean_prediction

    print(f"\nTest Set Evaluation - F1 Score: {eval_results['eval_f1_score']}")
    print(f"Test Set Evaluation - Accuracy: {eval_results['eval_accuracy']}")


## HuBERT-Trial Cross Dataset Evaluation

HuBERT-Trial on DeceitBank

In [5]:
from transformers import AutoModelForAudioClassification
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from transformers import pipeline

model = AutoModelForAudioClassification.from_pretrained("/content/drive/MyDrive/trial/model_trial_final")
feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/hubert-base-ls960")
classifier = pipeline("audio-classification", model=model,feature_extractor = feature_extractor)

In [None]:
def evaluate_cross_dataset(dataset,model):
  dataset = load_dataset("audiofolder", data_dir = dataset, split='train')
  dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))

  feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/hubert-base-ls960")
  classifier = pipeline("audio-classification", model=model,feature_extractor = feature_extractor)

  dataset = apply_window(dataset)

  y_true_test = dataset["label"]
  predictions_test = []

  for file in dataset:
      audio_array = np.array(file["audio"]['array'])
      sampling_rate = 16000

      result = classifier({"raw": audio_array, "sampling_rate": sampling_rate})
      predictions_test.append(result[0]["label"])

  label_mapping = {"LABEL_0": 0, "LABEL_1": 1}
  predictions_test = [label_mapping[label] for label in predictions_test]

  recall =accuracy_score(y_true_test, predictions_test)
  f1 = f1_score(y_true_test, predictions_test)

  print(f"Accuracy: {recall}")
  print(f"F1 Score: {f1}")


In [None]:
model = AutoModelForAudioClassification.from_pretrained("/content/drive/MyDrive/trial/model_trial_final")

evaluate_cross_dataset("/content/drive/MyDrive/RecordingsWav",model)


Resolving data files:   0%|          | 0/121 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Accuracy: 0.6819444444444445
F1 Score: 0.6427638190954773


HuBERT-Trial on Bag of Lies

In [14]:
evaluate_cross_dataset("/content/drive/MyDrive/Audio-BagofLies",model)

Resolving data files:   0%|          | 0/325 [00:00<?, ?it/s]

Accuracy: 0.736887911260047
F1 Score: 0.699432357228956


## Cross Dataset Evaluation : HuBERT-Bag of Lies

On Trial Data

In [None]:
bog_model = AutoModelForAudioClassification.from_pretrained("/content/drive/MyDrive/BagOfLiesModel/model_BagOfLiesModel_final")

evaluate_cross_dataset("/content/drive/MyDrive/all_audio",bog_model)


Resolving data files:   0%|          | 0/116 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Accuracy: 0.7444444444444445
F1 Score: 0.6988794926004228


On DeceitBank Data

In [None]:
evaluate_cross_dataset("/content/drive/MyDrive/RecordingsWav",bog_model)

Resolving data files:   0%|          | 0/121 [00:00<?, ?it/s]

Accuracy: 0.6819444444444445
F1 Score: 0.6427638190954773


## Cross Dataset Evaluation : HuBERT-DeceitBank

On Trial Data

In [11]:
deceitbank_model = AutoModelForAudioClassification.from_pretrained("/content/drive/MyDrive/DeceitBank/model_DeceitBank_final")
evaluate_cross_dataset("/content/drive/MyDrive/all_audio",deceitbank_model)

Resolving data files:   0%|          | 0/116 [00:00<?, ?it/s]

Accuracy: 0.4288799526004245
F1 Score: 0.0694323573895664


On Bag of Lies Data

In [12]:
evaluate_cross_dataset("/content/drive/MyDrive/Audio-BagofLies",deceitbank_model)

Resolving data files:   0%|          | 0/116 [00:00<?, ?it/s]

Accuracy: 0.5876887911260047
F1 Score: 0.0994323573895641
