In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [1]:
def audio_input(audio_directory, sr=44100):
    '''
    Takes in a directory of audio files and returns a list of audio arrays

    Parameters:
    -----------
    audio_directory: str, path to audio directory
    
    Returns:
    --------
    Tuple: samples: (np.ndarray, shape = (N,), 
                    sampling_rate: int)
    '''
    samples, sampling_rate = librosa.load(path = audio_directory, sr=sr, mono=True)
    return samples, sampling_rate

def slice_audio(audio_array, sampling_rate, slice_len):
    """
    Parameters:
    -----------
    audio_array: np.ndarray, shape = (N,)
    sampling_rate: int
    slice_len: int
        The desired length of sliced clips in seconds
    Returns:
    -----------
    slices: list of np.ndarrays, each representing a slice_len clip 
    """
    slices = []
    audio_array_list = list(audio_array)
    audio_array2 = audio_array_list
    # 5 * (41000 * t) / 41000
    # 54 * 41000 % (5 * 41000) = 9 * 41000
    # 54 * 41000 += 41000 * 5 - 
    # print("here2", (sampling_rate * slice_len))
    if len(audio_array2) < (sampling_rate * slice_len):
        while len(audio_array2) < (sampling_rate * slice_len):
            # print("here", len(audio_array2)) # 41000 * 3
            
            # print(audio_array.shape[0])
            # print("here2", sampling_rate * slice_len - audio_array.shape[0])
            # print("extra", len(audio_array2) % (sampling_rate * slice_len))
            # 41000 * 3 + 41000 * 5 - 41000 * 3
            # print(audio_array.shape)
            audio_array2 += audio_array_list
        audio_array2 += audio_array_list[:sampling_rate * slice_len % audio_array.shape[0]]
    else:
        audio_array2 += audio_array_list[:audio_array.shape[0] % sampling_rate]

    # print("len: ", len(audio_array2), len(audio_array2) / sampling_rate)
    for clip in range(len(audio_array2)//(sampling_rate * slice_len)):
        
        arr = audio_array2[sampling_rate*slice_len*clip:sampling_rate*slice_len*(clip+1)]
        # print("len, arr", len(arr))
        slices.append(np.array(arr))
    # print(len(slices))
    return slices

In [None]:
from __future__ import print_function
from __future__ import division
import numpy as np
import librosa
import torch
import torch.nn as nn
from torch import tensor
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import pickle
import requests
import datetime
import random
import librosa.display
import matplotlib.mlab as mlab
import wave


# animal_audio_data/White-beaked_Dolphin

audio_data_dir = "gdrive/MyDrive/Cogfee Beans/audio_data"
spectrogram_data_dir = "gdrive/MyDrive/Cogfee Beans/spectrograms"
for animal in os.listdir(audio_data_dir):
    print(animal)
    for audio in os.listdir(os.path.join(audio_data_dir, animal)):
        with wave.open(os.path.join(os.path.join(audio_data_dir, animal), audio), "rb") as f:
            sr = f.getframerate()
            print("sampling rate", sr)
            # if sr < 44100:
            #     continue
        samples, sampling_rate = audio_input(os.path.join(os.path.join(audio_data_dir, animal), audio), sr=sr)
        clips = slice_audio(samples, sampling_rate, 5)
        # print(animal, len(clips))
        for clip in clips:
            fig, ax = plt.subplots()
            spectrogram, freqs, times, im = ax.specgram(
                clip,
                NFFT=4096,
                Fs=sampling_rate,
                window=mlab.window_hanning,
                noverlap=4096 // 2,
                mode='magnitude',
                scale="dB"
            )
            
            # fig.colorbar(im)
            # ax.set_xlabel("Time [seconds]")
            # ax.set_ylabel("Frequency (Hz)")
            # ax.set_title("Spectrogram of Recording")
            # ax.set_ylim(0, 18000); # less than half of sampling rate
            # plt.show()
            # plt.gray()
            plt.axis('off')

            #######################
            # print(spectrogram.max())
            spectrogram = (spectrogram - spectrogram.mean()) / spectrogram.std()
            print(np.shape(spectrogram))
            filename = (animal.replace("\n","")
                        + str(datetime.datetime.now().strftime("%y%m%d_%H%M%S%f")) 
                        + "_" +str(random.randrange(99, 99999, 6))+".png")
            animal_name = animal.replace("\n","")
            if np.std(spectrogram) > 0.1:
                path = f"{spectrogram_data_dir}/{animal_name}/{filename}"
                try:
                    plt.savefig(path, bbox_inches='tight', pad_inches=0)
                except:
                    os.makedirs(f"{spectrogram_data_dir}/{animal_name}/")
                    plt.savefig(path, bbox_inches='tight', pad_inches=0)
                plt.close()
                # plt.clf()
                # plt.cla()
            

Narwhal
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 60600
(2049, 146)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)
sampling rate 81920
(2049, 199)




(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 600
(2049, 1)
sampling rate 600
(2049, 1)
sampling rate 600
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 600
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 600
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 600
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 600
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 600
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 600
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 600




(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 640
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 640
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 600
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 600
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 600
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 600
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 600
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 640
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 640
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 640
(2049, 1)
(2049, 1)
(2049, 1)
(2049, 1)
sampling rate 640
(2049, 1)
(2