In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
import tensorflow_hub as hub
import pyaudio
import wave
import threading
import cv2
from ultralytics import YOLO
from datetime import datetime

# === Model Loading ===
yolo_model_path = 'yolo_v1 (2).pt'  # YOLO model path

# Load models
audio_model = tf.keras.models.load_model("best_model.keras")
yamnet_model = hub.load('https://www.kaggle.com/models/google/yamnet/TensorFlow2/yamnet/1')
yolo_model = YOLO(yolo_model_path)

# Ensure required directories exist
os.makedirs("anomalous_videos", exist_ok=True)
os.makedirs("anomalous_audio", exist_ok=True)

# Index-to-label mapping for YAMNet
index_to_label = {
    0: "Emergency_alert_sound",
    1: "Explosions",
    2: "Gunshots",
    3: "Human screams",
    4: "Bottles breaking",
    5: "Dog bark"
}

# === Audio Threat Detection ===

# Audio recording configuration
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
RECORD_SECONDS = 5  # Analyze 5-second chunks
AUDIO_FOLDER = "anomalous_audio"

def preprocess_audio(audio_waveform, target_length=16000 * 5):
    """Pad or truncate audio waveform to 5 seconds (16kHz)."""
    if len(audio_waveform) < target_length:
        audio_waveform = np.pad(audio_waveform, (0, target_length - len(audio_waveform)))
    else:
        audio_waveform = audio_waveform[:target_length]
    audio_waveform = audio_waveform.astype(np.float32) / np.max(np.abs(audio_waveform))
    return audio_waveform

def predict_audio_stream(audio_chunk, confidence_threshold=0.5):
    """Predict the class of a given audio chunk."""
    processed_audio = preprocess_audio(audio_chunk)
    _, yamnet_embeddings, _ = yamnet_model(processed_audio)
    avg_embedding = tf.reduce_mean(yamnet_embeddings, axis=0).numpy().reshape(1, -1)
    prediction = audio_model.predict(avg_embedding)
    predicted_class_index = np.argmax(prediction, axis=1)[0]
    confidence = prediction[0][predicted_class_index]
    if confidence >= confidence_threshold:
        return index_to_label[predicted_class_index], confidence
    return "Unknown", confidence

def record_and_analyze_audio():
    """Continuously record audio from the microphone and analyze it."""
    audio = pyaudio.PyAudio()
    stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE,
                        input=True, frames_per_buffer=CHUNK)

    print("Microphone is ON. Listening for anomalies...")
    while True:
        try:
            frames = []
            for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
                data = stream.read(CHUNK, exception_on_overflow=False)
                frames.append(np.frombuffer(data, dtype=np.int16))

            # Convert frames to a single audio array
            audio_data = np.concatenate(frames, axis=0).astype(np.float32)
            predicted_class, confidence = predict_audio_stream(audio_data)

            if predicted_class != "Unknown":
                print(f"[{datetime.now()}] Detected: {predicted_class} (Confidence: {confidence:.2f})")

                # Save the anomalous audio to a file
                filename = f"{AUDIO_FOLDER}/{predicted_class}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
                with wave.open(filename, 'wb') as wf:
                    wf.setnchannels(CHANNELS)
                    wf.setsampwidth(audio.get_sample_size(FORMAT))
                    wf.setframerate(RATE)
                    wf.writeframes(b''.join(frames))
                print(f"Anomalous audio saved: {filename}")
        except KeyboardInterrupt:
            print("Microphone stream stopped.")
            break
        except Exception as e:
            print(f"Error: {e}")

    stream.stop_stream()
    stream.close()
    audio.terminate()

# === Video Threat Detection ===
def process_video_stream(video_source=0):
    cap = cv2.VideoCapture(video_source)  # Use 0 for webcam
    if not cap.isOpened():
        print("Error: Could not open video stream")
        return

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS)) or 30

    out = None
    recording = False

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Warning: Failed to capture frame, skipping...")
            continue

        results = yolo_model.predict(frame, conf=0.5, verbose=False)
        annotated_frame = results[0].plot()

        anomalies_detected = False
        if results[0].boxes:
            for box in results[0].boxes:
                cls_id = int(box.cls)
                confidence = box.conf.item()
                label = yolo_model.names[cls_id]

                print(f"[{datetime.now()}] Detected: {label} (Confidence: {confidence:.2f})")
                if label in ["violence", "weaponized"]:
                    anomalies_detected = True

                    if not recording:
                        video_filename = f"anomalous_videos/{label}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
                        out = cv2.VideoWriter(video_filename, fourcc, fps, (frame_width, frame_height))
                        recording = True
                        print(f"[{datetime.now()}] Recording started: {video_filename}")

                    if out:
                        out.write(frame)
                    break

        if not anomalies_detected and recording:
            print(f"[{datetime.now()}] Anomaly ended. Stopping recording.")
            recording = False
            if out:
                out.release()
                out = None

        if cv2.waitKey(1) & 0xFF == ord('q'):
            print("Exit requested. Stopping program.")
            break

    if recording and out:
        out.release()
    cap.release()
    cv2.destroyAllWindows()

# === Unified System ===
def unified_system(video_source=0):
    """Unified system for real-time audio and video threat detection."""
    # video_thread = threading.Thread(target=process_video_stream, args=(video_source,))
    audio_thread = threading.Thread(target=record_and_analyze_audio)

    # video_thread.start()
    audio_thread.start()

    # video_thread.join()
    audio_thread.join()

# Entry point
if __name__ == "__main__":
    unified_system(video_source=0)















Microphone is ON. Listening for anomalies...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[2025-01-11 18:02:24.804739] Detected: Explosions (Confidence: 0.92)
Anomalous audio saved: anomalous_audio/Explosions_20250111_180224.wav
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[2025-01-11 18:02:29.784268] Detected: Explosions (Confidence: 0.90)
Anomalous audio saved: anomalous_audio/Explosions_20250111_180229.wav
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[2025-01-11 18:02:34.764060] Detected: Explosions (Confidence: 0.89)
Anomalous audio saved: anomalous_audio/Explosions_20250111_180234.wav
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[2025-01-11 18:02:39.749625] Detected: Explosions (Confidence: 0.86)
Anomalous audio saved: anomalous_audio/Explosions_20250111_180239.wav
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[2025-01-11 18:02:44.806877] Detected: 