# SORT SUNSET BAY HYDROPHONE CHUNKS FROM AWS
## Isabelle Brandicourt, 10-31-2024

### Step 1: open portal to AWS and look at the available buckets for sunset bay hydrophone, convert UNIX time to UTC to PST for ease of understanding.

In [6]:
import boto3
from botocore.config import Config
from botocore import UNSIGNED

# Set up the S3 client with unsigned configuration
s3_client = boto3.client('s3', config=Config(signature_version=UNSIGNED))

bucket_name = 'audio-orcasound-net'
prefix = 'rpi_sunset_bay/hls/'
directories, num_directories = [], []
files = []

# List objects in the specified bucket and prefix
response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix, Delimiter='/')

# Print directory (pseudo-folder) names
if 'CommonPrefixes' in response:
    for prefix_info in response['CommonPrefixes']:
        directories.append(prefix_info['Prefix'])
        # Get the numeric part of the directory name
        num = prefix_info['Prefix'].split('/')[-2]  # Get the last directory name before the trailing '/'
        num_directories.append(int(num))

# Print file names
if 'Contents' in response:
    for obj in response['Contents']:
        files.append(obj['Key'])


### Step 2: Read the CSV with the chunks of data that we want to look at from OrcaSound
Now the data will be sorted into multiple dataframes, each holding all of the entries that we are curious about from a specific bucket in AWS.

In [7]:
import pandas as pd
import openpyxl

file_path = 'orcasound_entries_oct.xlsx'
orcasound_entries = pd.read_excel(file_path)
buckets = orcasound_entries['aws_bucket'].unique()
sorted_by_bucket = {name: group for name, group in orcasound_entries.groupby('aws_bucket')}

# print(orcasound_entries.head())
orcasound_entries['unix'] = pd.to_datetime(orcasound_entries['utc_date_time'], format='%Y-%m-%d_%H.%M.%S').astype(int) // 10**9
# print('\nNEW FRAME:\n', orcasound_entries)


### Step 3: Choose a specific time segment to analyze
Enter your timestamps of interest and those data files will be downloaded.

In [8]:
from datetime import datetime
import pytz
import os

year = 2024
month = 10
day = 29
start_hour = 20
start_min = 26
start_sec = 30
end_hour = 20
end_min = 26
end_sec = 40

pst = pytz.timezone('America/Los_Angeles')
start_time = pst.localize(datetime(year, month, day, start_hour, start_min, start_sec))
end_time = pst.localize(datetime(year, month, day, end_hour, end_min, end_sec))
print('Starting time:', start_time, 'Ending time:', end_time)

start_unix_time = int(start_time.timestamp())
end_unix_time = int(end_time.timestamp())

below_target = [val for val in buckets if val < start_unix_time]
use_bucket = int(max(below_target))
print(f'Using bucket {use_bucket}')

start_live = round((start_unix_time - use_bucket)/10)-3
end_live = round((end_unix_time - use_bucket)/10)+3
print(f'Live start: {start_live}, Live end: {end_live}')

local_folder = f'{use_bucket}'
local_files = []
os.makedirs(local_folder, exist_ok=True)

s3_keys = []

if int(local_folder) in sorted_by_bucket:
    by_bucket = sorted_by_bucket[int(local_folder)]
    s = start_live
    while s <= end_live:
        aws_filename = f'live{s}.ts'
        s3_key = f'rpi_sunset_bay/hls/{use_bucket}/{aws_filename}'
        download_path = os.path.join(local_folder, aws_filename)

        try:
            s3_client.download_file(bucket_name, s3_key, download_path)
            local_files.append(download_path)
        except Exception as e:
            print(f"An error occurred in downloading the audio files from AWS: {e}")

        s += 1


Starting time: 2024-10-29 20:26:30-07:00 Ending time: 2024-10-29 20:26:40-07:00
Using bucket 1730185224
Live start: 7354, Live end: 7361


### Step 4: Concatenate the selected section and save it as a single file with the time range.

In [9]:
import subprocess

chunk_name = os.path.join('ts_chunks', start_time.strftime('%Y-%m-%d_%H.%M.%S') + '_to_' + end_time.strftime('%Y-%m-%d_%H.%M.%S'))
if local_files:
    with open('temp_concat.txt', 'w') as f:
        for local_file in local_files:
            f.write(f"file '{local_file}'\n")

    # The ffmpeg command
    command = [
        'ffmpeg',
        '-f', 'concat',
        '-safe', '0',
        '-i', 'temp_concat.txt',
        '-c', 'copy',
        '-y',
        chunk_name + '.ts'
    ]

    try:
        subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        print(f"Files concatenated successfully into {chunk_name}.ts")
    except subprocess.CalledProcessError as e:
        print(f"An error occurred during concatenation: {e}")

    os.remove('temp_concat.txt')
    for local_file in local_files:
        os.remove(local_file)  # Remove each local .ts file
    os.rmdir(local_folder)  # Remove the local folder
else:
    print("No files to concatenate.")



Files concatenated successfully into ts_chunks/2024-10-29_20.26.30_to_2024-10-29_20.26.40.ts
