In [12]:
import numpy as np 
import pandas as pd 
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_io as tfio
import os, random
import shutil
from pydub import AudioSegment
import glob
from pathlib import Path
from IPython.display import display, Audio
import soundfile as sf

In [13]:
# Specify the path where you want to save the model weights
weights_path = 'weights/my_model_weights.h5'

In [14]:
ROOT = "/home/gridsan/clast/hackathon-april"
train_metadata = pd.read_csv(os.path.join(ROOT, 'metadata.csv'))[['primary_label', 'filename']]
train_metadata['filepath'] = 'data/Binary_Drone_Audio/' + train_metadata['filename']
classes = set(random.sample(train_metadata['primary_label'].unique().tolist(), 2)) 


In [15]:
# Rebuild the model architecture
model_reloaded = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(1024), dtype=tf.float32, name='input_embedding'),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(len(classes))  # Make sure `classes` is defined or replace `len(classes)` with the actual number
], name='my_reloaded_model')

# Load the weights
model_reloaded.load_weights(weights_path)

# If your model requires compiling to make predictions, compile the model
model_reloaded.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                       optimizer="adam",
                       metrics=['accuracy'])

# Now the model is ready to be used for predictions or evaluation


In [16]:

# @tf.function
# def load_wav_16k_mono(filename):
#     """Load a WAV file, convert it to a float tensor, and resample to 16 kHz single-channel audio."""
#     file_contents = tf.io.read_file(filename)
#     wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
#     wav = tf.squeeze(wav, axis=-1)
#     if sample_rate != 16000:
#         wav = tfio.audio.resample(wav, rate_in=tf.cast(sample_rate, tf.int64), rate_out=16000)
#     return wav


# def create_segments_for_directory(directory, segment_duration=0.975, sr=16000):
#     """Create fixed-length segments for all WAV files in a directory."""
#     file_paths = glob.glob(directory + '/*.wav')  # Adjust pattern if necessary
#     all_segments = []
#     segment_samples = int(segment_duration * sr)  # Number of samples per segment

#     for file_path in file_paths:
#         audio = load_wav_16k_mono(file_path)
#         audio_length = tf.shape(audio)[0]
#         num_segments = audio_length // segment_samples

#         for i in range(num_segments):
#             start_sample = i * segment_samples
#             end_sample = start_sample + segment_samples
#             segment = audio[start_sample:end_sample]
#             start_time = tf.cast(start_sample, tf.float32) / sr  # Convert sample index to time in seconds
#             all_segments.append((file_path, start_time, segment))

#     return all_segments

# def load_wav_segment(file_path, start, segment):
#     """Prepare a segment for TensorFlow dataset format, with metadata."""
#     # Optionally reshape or process segment if necessary
#     segment_tensor = tf.reshape(segment, [1, -1])  # Ensure it has a batch dimension
#     return segment_tensor, file_path, start

# def generate_dataset(directory):
#     segments = create_segments_for_directory(directory)
#     # Convert list to TensorFlow dataset
#     segments_ds = tf.data.Dataset.from_generator(
#         lambda: segments,
#         output_types=(tf.string, tf.float32, tf.float32),
#         output_shapes=((), (), (None,))
#     )
#     # Map loading function to dataset
#     segments_ds = segments_ds.map(lambda fp, st, sg: load_wav_segment(fp, st, sg))
#     return segments_ds

In [17]:
@tf.function
def load_wav_16k_mono(filename):
    """Load a WAV file, convert it to a float tensor, and resample to 16 kHz single-channel audio."""
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
    wav = tf.squeeze(wav, axis=-1)
    if sample_rate != 16000:
        wav = tfio.audio.resample(wav, rate_in=tf.cast(sample_rate, tf.int64), rate_out=16000)
    return wav

def create_segments_for_directory(directory, sr=16000, segment_samples=1024):
    """Create fixed-length segments for all WAV files in a directory, each segment being 1024 samples long."""
    file_paths = glob.glob(directory + '/*.wav')  # Adjust pattern if necessary
    all_segments = []

    for file_path in file_paths:
        audio = load_wav_16k_mono(file_path)
        audio_length = tf.shape(audio)[0]
        num_segments = audio_length // segment_samples

        for i in range(num_segments):
            start_sample = i * segment_samples
            end_sample = start_sample + segment_samples
            segment = audio[start_sample:end_sample]
            start_time = tf.cast(start_sample, tf.float32) / sr  # Convert sample index to time in seconds
            all_segments.append((file_path, start_time, segment))

    return all_segments


In [18]:
def load_wav_segment(file_path, start, segment):
    """Prepare a segment for TensorFlow dataset format, with metadata."""
    # Optionally reshape or process segment if necessary
    segment_tensor = tf.reshape(segment, [1, -1])  # Ensure it has a batch dimension
    return segment_tensor, file_path, start

def generate_dataset(directory):
    segments = create_segments_for_directory(directory)
    # Convert list to TensorFlow dataset
    segments_ds = tf.data.Dataset.from_generator(
        lambda: segments,
        output_types=(tf.string, tf.float32, tf.float32),
        output_shapes=((), (), (None,))
    )
    # Map loading function to dataset
    segments_ds = segments_ds.map(lambda fp, st, sg: load_wav_segment(fp, st, sg))
    return segments_ds


In [19]:
# Example usage
directory = 'data/test_data'
test_dataset = generate_dataset(directory)

# Display dataset structure
for features, file_path, start_time in test_dataset.take(1):
    print("Features shape:", features.shape)
    print("File Path:", file_path.numpy().decode('utf-8'), "Start Time:", start_time.numpy())



Features shape: (1, 1024)
File Path: data/test_data/DRONE_018.wav Start Time: 0.0


In [20]:
def predict_with_metadata(model, dataset):
    """Predict using model and return predictions with metadata."""
    results = []
    for wav_tensor, file_path, start in dataset:
        print(wav_tensor, file_path, start)
        predictions = model.predict(wav_tensor)
        probabilities = tf.nn.softmax(predictions).numpy()
        predicted_classes = np.argmax(probabilities, axis=1)

        results.append((file_path.numpy().decode('utf-8'), start.numpy(), probabilities, predicted_classes))
    return results

# Example usage
predictions_with_metadata = predict_with_metadata(model_reloaded, test_dataset)


tf.Tensor(
[[-7.0289667e-07  1.3009848e-06  2.9782027e-06 ...  3.4204012e-01
   2.0183794e-01 -1.8140333e-02]], shape=(1, 1024), dtype=float32) tf.Tensor(b'data/test_data/DRONE_018.wav', shape=(), dtype=string) tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(
[[ 0.08595809  0.05264312  0.04353417 ...  0.00741842 -0.18549806
  -0.09958941]], shape=(1, 1024), dtype=float32) tf.Tensor(b'data/test_data/DRONE_018.wav', shape=(), dtype=string) tf.Tensor(0.064, shape=(), dtype=float32)
tf.Tensor(
[[-0.10941676 -0.21260847 -0.0752945  ...  0.08607049 -0.02853771
   0.01273412]], shape=(1, 1024), dtype=float32) tf.Tensor(b'data/test_data/DRONE_018.wav', shape=(), dtype=string) tf.Tensor(0.128, shape=(), dtype=float32)
tf.Tensor(
[[-0.00688485 -0.17047666 -0.00445479 ...  0.30323622  0.14840145
   0.20075089]], shape=(1, 1024), dtype=float32) tf.Tensor(b'data/test_data/DRONE_018.wav', shape=(), dtype=string) tf.Tensor(0.192, shape=(), dtype=float32)
tf.Tensor(
[[ 0.1165347  -0.01137509  0.0080

In [21]:
predictions_with_metadata

[('data/test_data/DRONE_018.wav',
  0.0,
  array([[0.04933938, 0.9506606 ]], dtype=float32),
  array([1])),
 ('data/test_data/DRONE_018.wav',
  0.064,
  array([[0.9961493 , 0.00385076]], dtype=float32),
  array([0])),
 ('data/test_data/DRONE_018.wav',
  0.128,
  array([[0.97818655, 0.02181345]], dtype=float32),
  array([0])),
 ('data/test_data/DRONE_018.wav',
  0.192,
  array([[1.0000000e+00, 2.6974884e-11]], dtype=float32),
  array([0])),
 ('data/test_data/DRONE_018.wav',
  0.256,
  array([[9.9993694e-01, 6.3089828e-05]], dtype=float32),
  array([0])),
 ('data/test_data/DRONE_018.wav',
  0.32,
  array([[0.14938587, 0.8506141 ]], dtype=float32),
  array([1])),
 ('data/test_data/DRONE_018.wav',
  0.384,
  array([[9.9999905e-01, 8.9700461e-07]], dtype=float32),
  array([0])),
 ('data/test_data/DRONE_018.wav',
  0.448,
  array([[0.6690464 , 0.33095357]], dtype=float32),
  array([0])),
 ('data/test_data/DRONE_018.wav',
  0.512,
  array([[9.9967492e-01, 3.2513813e-04]], dtype=float32),
  ar

In [23]:
import pandas as pd

# Create DataFrame
df = pd.DataFrame(predictions_with_metadata, columns=['Filename', 'Start_Time', 'Probabilities', 'Predictions'])

# Saving to CSV for further analysis
df.to_csv('predictions_with_timestamps.csv', index=False)
