### **Teste Audio Real**

In [None]:
!pip install ffmpeg-python

In [None]:
from IPython.display import HTML, Javascript, display
from google.colab.output import eval_js
from base64 import b64decode
import io
import ffmpeg

# --- Variável global para armazenar o áudio ---
# Vamos guardar o áudio em formato de bytes na memória
audio_data_bytes = None
# ---------------------------------------------


# --- Funções Python que serão chamadas pelo JavaScript ---
from google.colab import output

def save_audio(b64data):
  """
  Callback do JS: Recebe o áudio em base64 e salva na variável global.
  """
  global audio_data_bytes
  # Decodifica o base64 para bytes
  audio_data_bytes = b64decode(b64data)
  print("Áudio recebido! Pronto para processar na Célula 3.")

# Registra a função 'save_audio' para que o JS possa chamá-la
output.register_callback('notebook.save_audio', save_audio)
# --------------------------------------------------------


# --- O Código JavaScript para Gravação ---
# Este código é executado no SEU NAVEGADOR, não no servidor Colab.
RECORD_JS = """
<script>
var stream;         // Stream do microfone
var recorder;       // Objeto MediaRecorder
var chunks;         // Pedaços do áudio gravado
var audio;          // (Não usado aqui, mas útil para debug)

async function startRecording() {
  // 1. Pedir permissão e obter o stream do microfone
  stream = await navigator.mediaDevices.getUserMedia({audio: true});
  recorder = new MediaRecorder(stream);
  chunks = [];

  // 2. O que fazer quando o áudio estiver disponível
  recorder.ondataavailable = e => chunks.push(e.data);

  // 3. O que fazer quando a gravação parar (botão 'Parar' clicado)
  recorder.onstop = e => {
    // Combina todos os pedaços em um único Blob
    const blob = new Blob(chunks, { 'type' : 'audio/webm' }); // Formato padrão do navegador
    const reader = new FileReader();

    reader.onload = () => {
      // Converte o Blob para base64
      const b64data = reader.result.split(',')[1];

      // Envia os dados em base64 para o Python (chamando a função 'save_audio')
      google.colab.kernel.invokeFunction('notebook.save_audio', [b64data], {});
    };
    reader.readAsDataURL(blob);
  };

  // 4. Inicia a gravação
  recorder.start();

  // Atualiza os botões
  document.getElementById('start').disabled = true;
  document.getElementById('stop').disabled = false;
}

function stopRecording() {
  // Para o gravador
  recorder.stop();
  // Para o stream do microfone (desliga a luzinha)
  stream.getTracks().forEach(track => track.stop());

  // Atualiza os botões
  document.getElementById('start').disabled = false;
  document.getElementById('stop').disabled = true;
}

// --- Criação dos Botões (HTML) ---
document.body.appendChild(document.createTextNode('Clique para gravar: '));
var startButton = document.createElement('button');
startButton.id = 'start';
startButton.innerHTML = '▶️ Gravar';
startButton.onclick = startRecording;
document.body.appendChild(startButton);

var stopButton = document.createElement('button');
stopButton.id = 'stop';
stopButton.innerHTML = '⏹️ Parar';
stopButton.disabled = true;
stopButton.onclick = stopRecording;
document.body.appendChild(stopButton);
</script>
"""

# Exibe o HTML/JS na saída da célula
display(HTML(RECORD_JS))


In [None]:
from IPython.display import Audio

if audio_data_bytes:
  print("Processando e salvando o áudio...")

  AUDIO_PATH = "/content/audio_teste.wav"
  TARGET_SR = 16000

  # O áudio gravado está em 'audio_data_bytes' no formato webm/opus
  # Precisamos usar o ffmpeg para converter

  try:
    # Usa o ffmpeg para ler os bytes da memória (stdin)
    # e converter para o formato/taxa de amostragem desejados
    process = (
        ffmpeg
        .input('pipe:') # Lê da entrada padrão (stdin)
        .output(AUDIO_PATH, acodec='pcm_s16le', ar=TARGET_SR, ac=1) # pcm_s16le é o formato WAV
        .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, overwrite_output=True)
    )

    # Envia os dados do áudio para o processo ffmpeg
    out, err = process.communicate(input=audio_data_bytes)

    if process.returncode != 0:
      print("Erro no FFMPEG:")
      print(err.decode())
    else:
      print(f"Áudio salvo com sucesso em: {AUDIO_PATH}")
      print(f"Taxa de amostragem: {TARGET_SR} Hz")

      # Toca o áudio salvo para verificação
      display(Audio(AUDIO_PATH))

  except ffmpeg.Error as e:
    print('Erro no ffmpeg:')
    print(e.stderr.decode())
  except Exception as e:
    print(f"Ocorreu um erro: {e}")

else:
  print("Nenhum áudio foi gravado ainda. Rode a Célula 2, grave e pare a gravação primeiro.")

# Limpa a variável para a próxima gravação
audio_data_bytes = None

NameError: name 'audio_data_bytes' is not defined

In [None]:
import shutil
import os
from google.colab import drive
drive.mount('/content/drive')

MODEL_DIR = "/content/drive/MyDrive/release_in_the_wild/teste"

# caminho de origem (onde está o arquivo no Drive)
origem = os.path.join(MODEL_DIR, "audio_fake_MONIQUE.wav")

# caminho de destino em /content
AUDIO_PATH = "/content/audio_teste.wav"

# copia o arquivo
shutil.copy(origem, AUDIO_PATH)

print("Arquivo copiado para:", AUDIO_PATH)
display(Audio(AUDIO_PATH))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Arquivo copiado para: /content/audio_teste.wav


In [None]:
from tensorflow.keras.models import load_model
from google.colab import drive
drive.mount('/content/drive')

MODEL_PATH = "/content/drive/MyDrive/modelos_deepfake/deepfake_audio_model_cnn_mfcc.keras"
model = load_model(MODEL_PATH)
print("Modelo Keras carregado!")

Mounted at /content/drive
Modelo Keras carregado!


In [None]:
import soundfile as sf
import librosa
import numpy as np

SR = 16000      # mesmo que no treino
N_MFCC = 40     # mesmo que no treino
SAMPLE_RATE = 16000
N_MFCC = 40
N_MELS = 40
FIXED_FRAMES = 128

def load_audio(path, sr=SR):
    wav, fs = sf.read(path)
    if wav.ndim > 1:
        wav = np.mean(wav, axis=1)
    if fs != sr:
        wav = librosa.resample(wav.astype(np.float32), orig_sr=fs, target_sr=sr)
    return wav.astype(np.float32)

def extract_mfcc(wav, sr=SAMPLE_RATE, n_mfcc=N_MFCC, max_len=FIXED_FRAMES):
    try:
        # calcula MFCC: shape (n_mfcc, n_frames)
        mfcc = librosa.feature.mfcc(y=wav, sr=sr, n_mfcc=n_mfcc)

        # normalização (z-score)
        mfcc = (mfcc - mfcc.mean()) / (mfcc.std() + 1e-9)

        # pad ou corte no eixo de tempo (frames)
        if mfcc.shape[1] < max_len:
            pad_width = max_len - mfcc.shape[1]
            mfcc = np.pad(mfcc, ((0, 0), (0, pad_width)), mode='constant')
        else:
            mfcc = mfcc[:, :max_len]

        return mfcc

    except Exception as e:
        print(f"Erro ao processar {wav}: {e}")
        return np.zeros((n_mfcc, max_len))

def predict_audio_real_fake_keras(audio_path, model, threshold=0.5):
    wav = load_audio(audio_path)
    mfcc = extract_mfcc(wav)   # tem que gerar shape (40, 128), por exemplo
    mfcc = np.expand_dims(mfcc, axis=-1)   # (40, 128, 1)
    X = np.expand_dims(mfcc, axis=0)

    # 5) Prever probabilidade
    proba_fake = model.predict(X)[0][0]  # valor entre 0 e 1

    # 6) Decidir rótulo (assumindo 1 = FAKE, 0 = REAL)
    pred_label = 1 if proba_fake >= threshold else 0

    return pred_label, proba_fake


In [None]:
label_map = {0: "REAL", 1: "FAKE"}

audio_path = "/content/audio_teste.wav"   # por exemplo
pred, proba_fake = predict_audio_real_fake_keras(audio_path, model)

print(f"Probabilidade de FAKE: {proba_fake*100:.2f}%")
print(f"Resultado: {label_map[pred]}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step
Probabilidade de FAKE: 100.00%
Resultado: FAKE


### **Treinamento**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ls /content/drive/MyDrive

In [None]:
import os
path = '/content/drive/MyDrive/release_in_the_wild'
os.chdir(path)
print("Diretório atual:", os.getcwd())


Diretório atual: /content/drive/MyDrive/release_in_the_wild


In [1]:
import os
import numpy as np
import soundfile as sf
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

DATASET_DIR = '/content/drive/MyDrive/release_in_the_wild'
REAL_DIR = os.path.join(DATASET_DIR, "real")
FAKE_DIR = os.path.join(DATASET_DIR, "fake")

SAMPLE_RATE = 16000
N_MFCC = 40
N_MELS = 40
FIXED_FRAMES = 128

def extract_mfcc(wav, sr=SAMPLE_RATE, n_mfcc=N_MFCC, max_len=FIXED_FRAMES):
    try:
        # calcula MFCC: shape (n_mfcc, n_frames)
        mfcc = librosa.feature.mfcc(y=wav, sr=sr, n_mfcc=n_mfcc)

        # normalização (z-score)
        mfcc = (mfcc - mfcc.mean()) / (mfcc.std() + 1e-9)

        # pad ou corte no eixo de tempo (frames)
        if mfcc.shape[1] < max_len:
            pad_width = max_len - mfcc.shape[1]
            mfcc = np.pad(mfcc, ((0, 0), (0, pad_width)), mode='constant')
        else:
            mfcc = mfcc[:, :max_len]

        return mfcc

    except Exception as e:
        print(f"Erro ao processar {wav}: {e}")
        return np.zeros((n_mfcc, max_len))

def load_audio(path, sr=SAMPLE_RATE):
    wav, fs = sf.read(path)
    if wav.ndim > 1:
        wav = np.mean(wav, axis=1)
    if fs != sr:
        wav = librosa.resample(wav.astype(np.float32), orig_sr=fs, target_sr=sr)
    return wav.astype(np.float32)

def load_dataset_by_person(real_dir, fake_dir):
    X, y, persons = [], [], []

    # REAL
    for person_name in os.listdir(real_dir):
        person_path = os.path.join(real_dir, person_name)
        if os.path.isdir(person_path):
            for root, _, files in os.walk(person_path):
                for f in files:
                    if f.lower().endswith(('.wav', '.flac', '.mp3')):
                        wav = load_audio(os.path.join(root, f))
                        feat = extract_mfcc(wav)
                        X.append(feat)
                        y.append(0)
                        persons.append(person_name)

    # FAKE
    for person_name in os.listdir(fake_dir):
        person_path = os.path.join(fake_dir, person_name)
        if os.path.isdir(person_path):
            for root, _, files in os.walk(person_path):
                for f in files:
                    if f.lower().endswith(('.wav', '.flac', '.mp3')):
                        wav = load_audio(os.path.join(root, f))
                        feat = extract_mfcc(wav)
                        X.append(feat)
                        y.append(1)
                        persons.append(person_name)

    X = np.array(X)[..., np.newaxis]
    y = np.array(y)
    persons = np.array(persons)
    return X, y, persons

print("Carregando dataset (isso pode levar alguns minutos)...")
X, y, persons = load_dataset_by_person(REAL_DIR, FAKE_DIR)
print("Dataset shape:", X.shape, y.shape)
print("Total de pessoas únicas:", len(np.unique(persons)))

# Divisão por pessoa — evita áudios iguais em treino/teste
unique_persons = np.unique(persons)
train_persons, test_persons = train_test_split(unique_persons, test_size=0.2, random_state=42)

train_mask = np.isin(persons, train_persons)
test_mask = np.isin(persons, test_persons)

X_train, X_test = X[train_mask], X[test_mask]
y_train, y_test = y[train_mask], y[test_mask]

print("Treino:", X_train.shape, "Teste:", X_test.shape)

# Modelo CNN simples
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(N_MELS, FIXED_FRAMES, 1)),
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D((2, 2)),

    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D((2, 2)),

    tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D((2, 2)),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# Treinamento
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=12,
    batch_size=16,
    verbose=1
)

# Avaliação
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int).flatten()

print("\nClassification Report:")
print(classification_report(y_test, y_pred, labels=[0, 1], target_names=['Real', 'Fake'], zero_division=0))
print("Test Accuracy:", accuracy_score(y_test, y_pred))

# Salva o modelo
model.save("/content/drive/MyDrive/modelos_deepfake/deepfake_audio_model_cnn_mfcc.keras")
print("✅ Modelo salvo em: /content/drive/MyDrive/modelos_deepfake/deepfake_audio_model_cnn_mfcc.keras")


KeyboardInterrupt: 

In [None]:
# ======================================================
# 1. Importações
# ======================================================
import os
import numpy as np
import librosa
import soundfile as sf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import load_model

# ======================================================
# 2. Montar Google Drive
# ======================================================
from google.colab import drive
drive.mount('/content/drive')

# ======================================================
# 3. Caminhos do Dataset e Modelo
# ======================================================
DATASET_DIR = '/content/drive/MyDrive/release_in_the_wild'
REAL_DIR = os.path.join(DATASET_DIR, "real")
FAKE_DIR = os.path.join(DATASET_DIR, "fake")

MODEL_PATH = "/content/drive/MyDrive/modelos_deepfake/deepfake_audio_model_cnn_mfcc.keras"

SAMPLE_RATE = 16000
N_MFCC = 40
FIXED_FRAMES = 128

# ======================================================
# 4. Funções auxiliares
# ======================================================
def extract_mfcc(wav, sr=SAMPLE_RATE, n_mfcc=N_MFCC, max_len=FIXED_FRAMES):
    mfcc = librosa.feature.mfcc(y=wav, sr=sr, n_mfcc=n_mfcc)
    mfcc = (mfcc - mfcc.mean()) / (mfcc.std() + 1e-9)

    if mfcc.shape[1] < max_len:
        mfcc = np.pad(mfcc, ((0, 0), (0, max_len - mfcc.shape[1])), mode='constant')
    else:
        mfcc = mfcc[:, :max_len]

    return mfcc

def load_audio(path):
    wav, sr = sf.read(path)
    if wav.ndim > 1:
        wav = wav.mean(axis=1)
    if sr != SAMPLE_RATE:
        wav = librosa.resample(wav.astype(np.float32), orig_sr=sr, target_sr=SAMPLE_RATE)
    return wav.astype(np.float32)

def load_dataset(real_dir, fake_dir):
    X, y, persons = [], [], []

    # REAL
    for person in os.listdir(real_dir):
        person_path = os.path.join(real_dir, person)
        if os.path.isdir(person_path):
            for root, _, files in os.walk(person_path):
                for f in files:
                    if f.lower().endswith((".wav", ".flac", ".mp3")):
                        path = os.path.join(root, f)
                        wav = load_audio(path)
                        X.append(extract_mfcc(wav))
                        y.append(0)
                        persons.append(person)

    # FAKE
    for person in os.listdir(fake_dir):
        person_path = os.path.join(fake_dir, person)
        if os.path.isdir(person_path):
            for root, _, files in os.walk(person_path):
                for f in files:
                    if f.lower().endswith((".wav", ".flac", ".mp3")):
                        path = os.path.join(root, f)
                        wav = load_audio(path)
                        X.append(extract_mfcc(wav))
                        y.append(1)
                        persons.append(person)

    X = np.array(X)[..., np.newaxis]
    y = np.array(y)
    persons = np.array(persons)
    return X, y, persons


# ======================================================
# 5. Carregar Dataset
# ======================================================
print("Carregando dataset...")
X, y, persons = load_dataset(REAL_DIR, FAKE_DIR)
print("Shape:", X.shape, y.shape)
print("Pessoas únicas:", len(np.unique(persons)))


# ======================================================
# 6. Dividir Teste (igual ao treinamento original)
# ======================================================
unique_persons = np.unique(persons)

train_persons, test_persons = train_test_split(
    unique_persons, test_size=0.2, random_state=42
)

test_mask = np.isin(persons, test_persons)

X_test = X[test_mask]
y_test = y[test_mask]

print("Teste:", X_test.shape, y_test.shape)


# ======================================================
# 7. Carregar modelo Keras
# ======================================================
print("Carregando modelo...")
model = load_model(MODEL_PATH)
print("Modelo carregado!")


# ======================================================
# 8. Avaliação
# ======================================================
y_prob = model.predict(X_test)
y_pred = (y_prob > 0.5).astype(int).flatten()

print("\n====== CLASSIFICATION REPORT ======")
print(classification_report(y_test, y_pred, target_names=["Real", "Fake"], zero_division=0))

print("Accuracy:", accuracy_score(y_test, y_pred))

print("\n====== CONFUSION MATRIX ======")
print(confusion_matrix(y_test, y_pred))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Carregando dataset...
