In [2]:
import torchaudio
import os
from pathlib import Path
from tqdm.notebook import tqdm
import sqlite3

In [2]:
def divideAudio(path, target_path, duration=5):
    ''' 
    Divide audio into N seconds chunks

    Params
    ------
    path: str
        Path to audio file
    target_path: str
        Path to save audio chunks
    duration: int
        Duration of each chunk

    Returns
    -------
    None

    '''
    # Get filename
    path = Path(path)
    target_path = Path(target_path)
    filename = Path(path).stem
    if not os.path.exists(target_path):
        os.makedirs(target_path)
    # Load audio
    waveform, sample_rate = torchaudio.load(path,format="wav")
    # Split audio
    split_waveform = waveform.split(int(sample_rate*duration), dim=1)
    # Save audio
    for i, audio in tqdm(enumerate(split_waveform),total=len(split_waveform)):
        save_path = target_path / f"{filename}-{i}.wav"
        torchaudio.save(save_path, audio, sample_rate, bits_per_sample=16)

In [3]:
filelist = os.listdir("/Users/davidluna/Documents/Audios/correccioes/cut/")

for file in filelist:
    file = file.split('.')[0]
    try:
        print('####### ', file , ' #######')
        pathAudio = f"/Users/davidluna/Documents/Audios/correccioes/cut/{file}.WAV"
        pathTarget = f"/Users/davidluna/Documents/Audios/correccioes/splitted/{file}"
        divideAudio(pathAudio, pathTarget, 15)
    except:
        print('Error in ', file)

#######  G0073  #######


  0%|          | 0/119 [00:00<?, ?it/s]

#######  G0059  #######


  0%|          | 0/119 [00:00<?, ?it/s]

#######    #######
Error in  
#######  G0075  #######


  0%|          | 0/119 [00:00<?, ?it/s]

#######  G0002  #######


  0%|          | 0/119 [00:00<?, ?it/s]

#######  G0001  #######


  0%|          | 0/119 [00:00<?, ?it/s]

#######  G0024  #######


  0%|          | 0/119 [00:00<?, ?it/s]

#######  G0026  #######


  0%|          | 0/119 [00:00<?, ?it/s]

#######  G0023  #######


  0%|          | 0/119 [00:00<?, ?it/s]

#######  G0009  #######


  0%|          | 0/119 [00:00<?, ?it/s]

#######  G0020  #######


  0%|          | 0/119 [00:00<?, ?it/s]

#######  G0091  #######


  0%|          | 0/119 [00:00<?, ?it/s]

#######  G0096  #######


  0%|          | 0/119 [00:00<?, ?it/s]

#######  G0097  #######


  0%|          | 0/119 [00:00<?, ?it/s]

#######  G0068  #######


  0%|          | 0/119 [00:00<?, ?it/s]

#######  SM04  #######


  0%|          | 0/119 [00:00<?, ?it/s]

#######  G0080  #######


  0%|          | 0/119 [00:00<?, ?it/s]

# Save audio info in DB

In [16]:
import torch
import torchaudio

def getAudioInfo(path):
    path = Path(path)
    name = path.stem
    folder = path.parents[0].stem
    waveform, sample_rate = torchaudio.load(path,format="wav")
    duration = torchaudio.info(path).num_frames / torchaudio.info(path).sample_rate
    channels = torchaudio.info(path).num_channels
    bits = torchaudio.info(path).bits_per_sample
    return name,folder, duration, sample_rate, channels, bits

In [15]:
import sqlite3
from pathlib import Path

conn = sqlite3.connect('../results/results.db')
cursor = conn.cursor()

AUDIOPATH = Path('/Users/davidluna/Documents/Audios/compare-recorders/splitted')
folders = list(AUDIOPATH.glob('*'))

for folder in folders:
    if folder.is_dir():
        audios = list(folder.glob('*.[Ww][Aa][Vv]'))
        for audio in audios:
            name,folder, duration, sample_rate, channels, bits = getAudioInfo(audio)
            cursor.execute(f"INSERT INTO audios (name, path, folder, duration, sample_rate, channels, bits) VALUES ('{name}', '{audio}', '{folder}', {duration}, {sample_rate}, {channels}, {bits})")
            conn.commit()

conn.close()
