## We will learn about 3 functions related to Audio I/O in torchaudio

In [1]:
# Import the pytorch main library and torch for audio library

import torch
import torchaudio

In [2]:
# Print the versions 
print(torch.__version__)
print(torchaudio.__version__)

2.1.2
2.1.2


In [3]:
# for file handling
import io
import os
import tarfile
import tempfile

# for accessing AWS services
import boto3

# for plotting 
import matplotlib.pyplot as plt

# for handling HTTP requests 
import requests

from botocore import UNSIGNED
from botocore.config import Config

# for display audio in jupyter
from IPython.display import Audio

# for downloading files
from torchaudio.utils import download_asset



## `1 : download_asset()`

**Purpose:** This function downloads an asset from the given URL and returns the file path.

**Input:** 

- `url` (string): The URL of the asset to be downloaded.

**Output:** 

- `file_path` (string): The path of the downloaded file.

In [4]:
# Download the files and return a string of path of the downloaded file on local system

SAMPLE_GSM = download_asset(key = "tutorial-assets/steam-train-whistle-daniel_simon.gsm")
SAMPLE_WAV = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
SAMPLE_WAV_8000 = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042-8000hz.wav")

# Lets print those paths to see where pytorch is downloading the files
print(type(SAMPLE_GSM))
print(SAMPLE_GSM)


<class 'str'>
/Users/ashish/.cache/torch/hub/torchaudio/tutorial-assets/steam-train-whistle-daniel_simon.gsm


In [5]:
def _hide_seek(obj):
    class _wrapper:
        def __init__(self, obj):
            self.obj = obj

        def read(self, n):
            return self.obj.read(n)

    return _wrapper(obj)

`filelike` - An object from a class having `__fspath__()` method and returns the path of the file
`pathlike` - An object from a class which includes methods such as `read()`,` write()`, `close()`, `seek()`.

## `2 : torchaudio.info()`

**Purpose:** This function returns the information of the given audio file.

**Input:** 

- `pathlike` (string): The path of the audio file.
- `filelike` (aspecifically an object that has methods similar to those found on a file object): The file-like object of the audio file.

**Output:** 

- `info` (dict): A class containing the information of the audio file.

In [6]:
# Example of taking input as pathlike object 
metadata = torchaudio.info(SAMPLE_WAV)
print(type(metadata))
print(metadata)

<class 'torchaudio._backend.common.AudioMetaData'>
AudioMetaData(sample_rate=16000, num_frames=54400, num_channels=1, bits_per_sample=16, encoding=PCM_S)


In [7]:
# Example of taking input as filelike object 
url = "https://download.pytorch.org/torchaudio/tutorial-assets/steam-train-whistle-daniel_simon.wav"
with requests.get(url, stream=True) as response:
    # implementation of function on filelike objects 
    metadata = torchaudio.info(_hide_seek(response.raw))
print(metadata)

AudioMetaData(sample_rate=44100, num_frames=109368, num_channels=2, bits_per_sample=16, encoding=PCM_S)


## `3 : torchaudio.load()`

**Purpose:** This function converts an audio file to a tensor.

**Input:** 

- `pathlike` (string): The path of the audio file.
- or  `filelike` (file-like object): The file-like object of the audio file.

**Output:** 

- `tuple` (Tensor, int ): Returns a tuple conataining tensor and sampling rate

### Example of taking input as pathlike object  

In [8]:

waveform, sample_rate = torchaudio.load(SAMPLE_WAV)

print(f" {waveform}.\n {waveform.shape} \n {type(waveform)}")
print(f"{sample_rate} \n{type(sample_rate)}")



 tensor([[0.0183, 0.0180, 0.0180,  ..., 0.0018, 0.0019, 0.0032]]).
 torch.Size([1, 54400]) 
 <class 'torch.Tensor'>
16000 
<class 'int'>


In [9]:
# channels_first argument can be passed to change the shape of loaded tensor.
waveform2, sample_rate2 = torchaudio.load(SAMPLE_WAV, channels_first=False )
print(f" {waveform2}.\n {waveform2.shape} \n {type(waveform2)}")

 tensor([[0.0183],
        [0.0180],
        [0.0180],
        ...,
        [0.0018],
        [0.0019],
        [0.0032]]).
 torch.Size([54400, 1]) 
 <class 'torch.Tensor'>


### Example of taking input as filelike object  

In [10]:
# Load audio data from file like  raw format
url = "https://download.pytorch.org/torchaudio/tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"
with requests.get(url, stream=True) as response:
    waveform1, sample_rate1 = torchaudio.load(_hide_seek(response.raw))
print(f"{waveform1}")
print(sample_rate1)
id(waveform1)

tensor([[0.0183, 0.0180, 0.0180,  ..., 0.0018, 0.0019, 0.0032]])
16000


5410862080

In [11]:
# Load audio from tar file 
tar_path = download_asset("tutorial-assets/VOiCES_devkit.tar.gz")
tar_item = "VOiCES_devkit/source-16k/train/sp0307/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"
with tarfile.open(tar_path, mode="r") as tarfile_:
    fileobj = tarfile_.extractfile(tar_item)
    waveform2, sample_rate2 = torchaudio.load(fileobj)
    
print(waveform2)    

tensor([[0.0183, 0.0180, 0.0180,  ..., 0.0018, 0.0019, 0.0032]])


## `3 : torchaudio.save()`

**Purpose:** This function saves a tensor to an audio format or any filelike format 

**Input:** 

- `pathlike` (string): The path where audio file is to be saved.
- `Tensor`: Data to be saved
- `Sample Rate` : The sample rate of the audio file
**Output:** 

- `None` : Saves the tensor to the given path

In [12]:
def inspect_file(path):
    print("-" * 10)
    print("Source:", path)
    print("-" * 10)
    print(f" - File size: {os.path.getsize(path)} bytes")
    print(f" - {torchaudio.info(path)}")
    print()

In [13]:
with tempfile.TemporaryDirectory() as tempdir:
    path = f"{tempdir}/save_example_default.wav"
    torchaudio.save(path, waveform, sample_rate)
    inspect_file(path)

----------
Source: /var/folders/ty/vmyg3z293jsfb3yvlyrwvgg00000gp/T/tmpu1tr_pdf/save_example_default.wav
----------
 - File size: 108878 bytes
 - AudioMetaData(sample_rate=16000, num_frames=54400, num_channels=1, bits_per_sample=16, encoding=PCM_S)



In [14]:
formats = [
    "flac",
     "vorbis",
    # "sph",
    # "amb",
    # "amr-nb",
    # "gsm",
]

In [15]:
waveform, sample_rate = torchaudio.load(SAMPLE_WAV_8000)
with tempfile.TemporaryDirectory() as tempdir:
    for format in formats:
        path = f"{tempdir}/save_example.{format}"
        torchaudio.save(path, waveform, sample_rate, format=format)
        inspect_file(path)

----------
Source: /var/folders/ty/vmyg3z293jsfb3yvlyrwvgg00000gp/T/tmp4rjt1u3d/save_example.flac
----------
 - File size: 45264 bytes
 - AudioMetaData(sample_rate=8000, num_frames=27200, num_channels=1, bits_per_sample=16, encoding=FLAC)

----------
Source: /var/folders/ty/vmyg3z293jsfb3yvlyrwvgg00000gp/T/tmp4rjt1u3d/save_example.vorbis
----------
 - File size: 19253 bytes
 - AudioMetaData(sample_rate=8000, num_frames=26624, num_channels=2, bits_per_sample=0, encoding=VORBIS)



