In [19]:
import numpy as np

# Create monkey patches
np.float = float
np.int = int
np.object = object
np.bool = bool

In [20]:
import pandas as pd
import re
import ast
import gunshot_utils as utils
import importlib
import os
import random
import pandas as pd
import torchaudio

importlib.reload(utils)

In [12]:
# This function is needed because for some reason the gunshots start list is treated as string due to the saving types of excel.
# INPUT gunshot_times as a string [time1, time2]
# OUTPUT gunshot_times as actual list of numbers
def preprocess_gunshot_times(gunshot_times):    
    # Remove multiple spaces
    gunshot_times = re.sub(r'\s+', ' ', gunshot_times).strip()
    
    # Insert commas between numbers if missing
    gunshot_times = re.sub(r'(?<=\d)\s(?=\d)', ', ', gunshot_times)
    
    # Ensure there are no trailing commas
    gunshot_times = gunshot_times.replace(', ]', ']')
    
    try:
        return ast.literal_eval(gunshot_times)
    except (ValueError, SyntaxError):
        return []

In [13]:
gunshots = pd.read_csv('/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Audio/Gunshots/7004819/edge-collected-gunshot-audio/gunshot-audio-all-metadata.csv')
gunshots = gunshots[['filename', 'num_gunshots', 'gunshot_location_in_seconds']]

# Apply the preprocessing function to the 'gunshot_location_in_seconds' column so we have proper numbers
gunshots['gunshot_location_in_seconds'] = gunshots['gunshot_location_in_seconds'].apply(preprocess_gunshot_times)

In [14]:
# Since the previously gunshot data is not really good. I will try to build up my on from the arcive folder which contains good and correct gunshots.

def get_random_timestamp(file_path):
    waveform, sample_rate = torchaudio.load(file_path)
    duration_seconds = waveform.shape[1] / sample_rate
    random_timestamp = random.uniform(0, duration_seconds)
    return [random_timestamp]

def find_wav_files_and_generate_timestamps(directory):
    records = []

    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(root, file)
                random_timestamp = [0.10]
                records.append({
                    'filename': file_path,
                    'gunshot_location_in_seconds': random_timestamp
                })

    return pd.DataFrame(records)

# Example usage

# Replace with the path to your directory with gunshots
gunshot_directory = '/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Audio/Gunshots/archive' 
gunshots_timestamps_df = find_wav_files_and_generate_timestamps(gunshot_directory)

In [15]:
music_df = pd.read_excel('/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Excel/baseline_data_w_topics_w_features.xlsx', engine='openpyxl')

music = music_df.rename(columns={'Path': 'filename'})
music['label'] = 0

music_labels = music[['label']]
music_paths_df = music[['filename']]

In [1]:
gunshots_timestamps_df.head()

In [17]:
# At this point we have gunshot_timestamps_df and music_paths_df 

In [21]:
df = utils.generate_data_samples(music, gunshots_timestamps_df, number_of_samples_w_gunshots=2000, number_of_samples_wo_gunshots=0)

In [87]:
df

In [24]:
import os
import shutil
import glob

def get_next_file_index(destination_folder, prefix):
    """
    Get the next available file index in the destination folder with a specific prefix.
    """
    existing_files = [f for f in os.listdir(destination_folder) if f.startswith(prefix) and f.endswith('.mp3')]
    if not existing_files:
        return 0
    indices = [int(f[len(prefix):-4]) for f in existing_files if f[len(prefix):-4].isdigit()]
    return max(indices) + 1

def copy_files_incrementally(source_folder, destination_folder, start_index):
    """
    Recursively copy .wav files from the source_folder to the destination_folder, renaming them
    incrementally starting from start_index.
    """
    # Define the file prefix
    prefix = 'with_gunshot_'

    # Find all .wav files recursively in the source folder
    wav_files = glob.glob(os.path.join(source_folder, '**', '*.wav'), recursive=True)

    # Get the starting index for the new files
    next_index = max(start_index, get_next_file_index(destination_folder, prefix))

    # Copy and rename files
    for wav_file in wav_files:
        # Create the new filename
        new_filename = f'{prefix}{next_index}.wav'
        destination_path = os.path.join(destination_folder, new_filename)

        # Copy the file to the destination directory with the new name
        shutil.copy2(wav_file, destination_path)
        print(f'Copied {wav_file} to {destination_path}')

        # Increment the index for the next file
        next_index += 1

In [25]:
get_next_file_index('/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Audio/Gunshots/Combined', 'with_gunshot_')

In [None]:
/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Audio/Gunshots/Combined_test

In [27]:
copy_files_incrementally('/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Audio/Gunshots/archive', '/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Audio/Gunshots/Combined', 7026)