# Create a latent representation of audio

## Loading the audio

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Setting up the coding environment

### Installing external packages

In [None]:
!pip install openl3

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting openl3
  Downloading openl3-0.4.1.tar.gz (18 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting resampy<0.3.0,>=0.2.1
  Downloading resampy-0.2.2.tar.gz (323 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.4/323.4 KB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting h5py<3.0.0,>=2.7.0
  Downloading h5py-2.10.0-cp38-cp38-manylinux1_x86_64.whl (2.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m48.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting moviepy>=1.0.0
  Downloading moviepy-1.0.3.tar.gz (388 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m388.3/388.3 KB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting proglog<=1.0.0
  Downloading proglog-0.1.10-py3-

### Importing libraries

In [None]:
import os
import sys
import time
import glob
import scipy.io.wavfile
import scipy.signal
import numpy as np
import pandas as pd
import librosa
import datetime
import openl3

### Defining global variables

In [None]:
# Audio dataset location
parent_dir = '/content/drive/MyDrive/Volet1'
label_dirs = ['RE', 'BE', 'BL', 'RL']

In [None]:
# Embedding model (trained on environmental audio)
latent_space_dim = 512
spectr = "mel128"
model = openl3.models.load_audio_embedding_model(input_repr=spectr, content_type="env",
                                                 embedding_size=latent_space_dim)

In [None]:
# Generate latent space feature names
LATENT_SPACE = ['ac_latent_space_'+str(i) for i in range(latent_space_dim)]

## Initializing the environment 

### Getting the number of files to process per label

In [None]:
# Get the number of audio files in each label directory
num_audio_files = {}
for label in label_dirs:
    label_dir = os.path.join(parent_dir, label)
    audio_files = glob.glob(label_dir + '/*/*.wav')
    num_audio_files[label] = len(audio_files)
num_audio_files

{'RE': 30, 'BE': 33, 'BL': 33, 'RL': 24}

### Defining auxiliary functions

In [None]:
def date_time_parser_from_filename(filename):
    """Parse the date and time from the filename
    
    Args:
        filename (str): filename (filename format: XXX_YYYYMMDD_HHMMSS.wav)
    
    Returns:
        date_time_obj (obj): date + time
        """
    # Get the date and time from the filename
    date_time = filename.split('_')[1] + filename.split('_')[2].split('.')[0]
    # Convert the date and time to datetime object
    date_time_obj = datetime.datetime.strptime(date_time, '%Y%m%d%H%M%S')
    return date_time_obj

## Computing latent representation 



In [None]:
# Create a dataframe to store the features
df_latent_space = pd.DataFrame(columns=['filename']+['date']+['site']+LATENT_SPACE+['label'])
# Parameters
new_sr = 16000 # new sampling rate
chunk_size = 60 # 60 seconds
total_size = 30 # 30 minutes

# Loop through the label directories and load the audio files
for label in label_dirs:
    # Print the label directory
    print('Loading label directory: {}'.format(label))
    # Get the path to the label directory
    label_dir = os.path.join(parent_dir, label)
    # Get the list of audio files
    audio_files = glob.glob(label_dir + '/*/*.wav')
    # Loop through the audio files
    for audio_file in audio_files:
        # Initialize start time
        start_time = time.time()
        # Get the filename
        filename = os.path.basename(audio_file)
        # Get the date and time from the filename
        date = date_time_parser_from_filename(filename)
        # Get the site lowercase (parent directory name)
        site = os.path.basename(os.path.dirname(audio_file)).lower()
        # Load the audio file
        audio, sr = librosa.load(audio_file)
        # Resample the audio file
        audio = librosa.resample(audio, sr, new_sr)
        # Pad the audio file to 30 minutes
        audio = np.pad(audio, (0, total_size*chunk_size*new_sr - audio.shape[0]), 'constant')
        # Compute the latent space representation of the audio file (60 seconds chunks)
        embeddings, _ = openl3.get_audio_embedding(audio, new_sr, model=model, hop_size=chunk_size, verbose=1)
        # Add the features to the dataframe
        for i in range(embeddings.shape[0]):
            # Get the latent space representation
            embedding = embeddings[i]
            # Add the features to the dataframe
            df_latent_space = df_latent_space.append(pd.DataFrame([[filename, date, site]+embedding.tolist()+[label]], columns=['filename']+['date']+['site']+LATENT_SPACE+['label']))
        # Calculate and print the total time taken for the loop
        end_time = time.time()
        total_time = end_time - start_time
        # Print the progress of the loop compare to the total number of audio files
        print('Loading audio file: {}/{} Total time taken: {} seconds'.format(audio_files.index(audio_file)+1, num_audio_files[label], total_time))

Loading label directory: RE
Loading audio file: 1/30 Total time taken: 207.15117168426514 seconds
Loading audio file: 2/30 Total time taken: 192.82018899917603 seconds
Loading audio file: 3/30 Total time taken: 196.30777168273926 seconds
Loading audio file: 4/30 Total time taken: 192.67963290214539 seconds
Loading audio file: 5/30 Total time taken: 189.6624538898468 seconds
Loading audio file: 6/30 Total time taken: 193.90763115882874 seconds
Loading audio file: 7/30 Total time taken: 189.59312772750854 seconds
Loading audio file: 8/30 Total time taken: 188.33304619789124 seconds
Loading audio file: 9/30 Total time taken: 195.65735626220703 seconds
Loading audio file: 10/30 Total time taken: 191.12162399291992 seconds
Loading audio file: 11/30 Total time taken: 195.78291177749634 seconds
Loading audio file: 12/30 Total time taken: 200.78850722312927 seconds
Loading audio file: 13/30 Total time taken: 189.9972550868988 seconds
Loading audio file: 14/30 Total time taken: 189.929822206497

## Saving the dataset

In [None]:
# Save the dataframe to a csv file
path = '/content/drive/MyDrive/dataset'
dataset_filename = 'ac_latent_space' + '_' + spectr + '_' + str(latent_space_dim) + '.csv'
df_latent_space.to_csv(os.path.join(path, dataset_filename), index=False)