In [12]:
# main.py
import boto3
from decouple import config
import sqlite3
import torch
import pathlib as Path
import boto3
import torchaudio
from tqdm import tqdm
import numpy as np
import os
from smart_open import open

from crossmodal_alignment.retrieval_model import TransformersModel

In [13]:

# Load S3-related configurations from the .env file
aws_access_key_id = config('AWS_ACCESS_KEY_ID')
aws_secret_access_key = config('AWS_SECRET_ACCESS_KEY')
bucket_name = config('AWS_BUCKET_NAME')
bucket_region = config('AWS_DEFAULT_REGION')
database_name = config('DATABASE_NAME')

In [None]:



def main():


    # Initialize the S3 client
    s3 = boto3.client('s3',
                      aws_access_key_id=aws_access_key_id,
                      aws_secret_access_key=aws_secret_access_key,
                      region_name=bucket_region)

    # Connect to the SQLite database
    conn = sqlite3.connect(database_name)
    c = conn.cursor()

    # List objects (files) in the specified S3 folder with the given pattern
    objects = s3.list_objects_v2(Bucket=bucket_name, Prefix='data/audio/dh-new_scapes')

    if 'Contents' in objects:
        for obj in objects['Contents']:
            key = obj['Key']
            if key.endswith(".wav"):
                print(f"Reading file: {key}")

                # Read the audio file content directly from S3 using smart_open
                with open(f's3://{bucket_name}/{key}', 'rb') as file:
            
                    # Now you can insert the S3 link and other relevant data into your SQLite database
                    # For example, insert it into a table named 'audio_files'
                    c.execute("INSERT INTO audio_files (s3_link, file_name) VALUES (?, ?)", (f's3://{bucket_name}/{key}', key))

    # Commit the changes and close the database connection
    conn.commit()
    conn.close()




In [11]:
def load_audio_input(audio_path: Path, sampling_rate: int):
    # if audio_path.suffix == ".npy":
    #     return torch.from_numpy(np.load(audio_path))
    # else:
    audio, sr = torchaudio.load(audio_path)
    audio = torchaudio.functional.resample(audio, sr, sampling_rate)
    return audio.mean(0)

In [None]:

def build_audio_index_s3(bucket_name, folder_path, _audio_encoder, sampling_rate):

    # Initialize the S3 client
    s3 = boto3.client('s3',
                      aws_access_key_id=aws_access_key_id,
                      aws_secret_access_key=aws_secret_access_key,
                      region_name=bucket_region)

    # List objects (files) in the specified S3 folder with the given pattern
    objects = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_path)
    

    s3_file_names = []
    audios = []

    if 'Contents' in objects:
        for obj in objects['Contents']:
            key = obj['Key']
            print(f"Reading file: {key}")
            s3_file_path = f's3://{bucket_name}/{key}'
            # Read the audio file content directly from S3 using smart_open
            with open(s3_file_path, 'rb') as file:
                # Modify the load_audio_input function to handle S3 file path
                input_audio = load_audio_input(file, sampling_rate)
                embedded_audio = _audio_encoder(input_audio)
            audios.append(embedded_audio)
            s3_file_names.append(s3_file_path)

    return torch.stack(audios), s3_file_names


In [16]:
def build_audio_index(root_dir: Path, _audio_encoder, pattern: str = "*.wav", **kwargs):
    file_names = []
    audios = []

    for index, file in enumerate(tqdm(root_dir.rglob(pattern))):
    

        with torch.inference_mode():
            input_audio = load_audio_input(file, **kwargs)
            embedded_audio = _audio_encoder(input_audio)
        audios.append(embedded_audio)
        file_names.append(file.name)
    return torch.stack(audios), file_names


In [17]:
from pathlib import Path
data_direc = Path('data/audio/dh-new_scapes')
audio_direc = Path('data/audio/dh-new_scapes')

model =TransformersModel()
ref_audios, ref_names = build_audio_index(
    data_direc, model.get_audio_embedding, sampling_rate=model.sampling_rate
)

0it [00:00, ?it/s]It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.
1it [00:00,  3.03it/s]It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.
2it [00:00,  3.10it/s]It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.
3it [00:00,  3.07it/s]It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.
4it [00:01,  3.35it/s]It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.
5it [00:01,  3.19it/s]It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to

In [None]:
model = TransformersModel()
folder_path = 'dh-new_scapes/'

ref_audios, ref_names = build_audio_index_s3(bucket_name, folder_path, model.get_audio_embedding,  sampling_rate=model.sampling_rate)

In [18]:
# Create a dictionary to store the tensors
embeddings = {
    'ref_audios': ref_audios,
    'ref_names': ref_names
}

# Save the tensor to a file using torch.save()
torch.save(embeddings, 'embeddings.pth')

In [19]:
file_path='embeddings.pth'
file_size = os.path.getsize('embeddings.pth')
print(file_size)

947252


In [20]:
# Convert the size to a human-readable format (e.g., KB, MB, GB)
def sizeof_fmt(size, decimal_places=2):
    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
        if size < 1024.0:
            break
        size /= 1024.0
    return f"{size:.{decimal_places}f} {unit}"

print(f"Size of '{file_path}': {sizeof_fmt(file_size)}")

Size of 'embeddings.pth': 925.05 KB


ImportError: cannot import name 'Iterable' from 'collections' (/Users/dilipharish/.pyenv/versions/3.10.0/lib/python3.10/collections/__init__.py)

missing expected call export("mo_parsing.core", regex_parameters)
Exception in thread Thread-6 (worker):
Traceback (most recent call last):
  File "/Users/dilipharish/.pyenv/versions/3.10.0/lib/python3.10/threading.py", line 1009, in _bootstrap_inner
    self.run()
  File "/Users/dilipharish/.pyenv/versions/3.10.0/lib/python3.10/threading.py", line 946, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/dilipharish/Library/Caches/pypoetry/virtualenvs/freesound-crossmodal-search-rhICCOhS-py3.10/lib/python3.10/site-packages/mo_imports/__init__.py", line 204, in worker
    _error("Missing export() calls")
  File "/Users/dilipharish/Library/Caches/pypoetry/virtualenvs/freesound-crossmodal-search-rhICCOhS-py3.10/lib/python3.10/site-packages/mo_imports/__init__.py", line 211, in _error
    raise Exception(description)
Exception: Missing export() calls
