
# Audio Tagging Toolkit
## attk.utils demo



[<< Back to README](https://github.com/hipstas/aapb-july-2017-demo/blob/master/README.md) \| [Forward to pg. 3 >>]()


In [None]:
## Suppresses text output from this cell
#%%capture

## Install the latest version of Audio Tagging Toolkit
#!pip install -U git+git://github.com/hipstas/audio-tagging-toolkit.git

## Or ...
#!pip install -U attk

In [None]:
import attk
import os
import random

In [None]:
## Download an MP3 audio file

!wget https://media.sas.upenn.edu/pennsound/authors/Armantrout/WPS1/Armantrout-Rae_06_Way_WPS1_NY_5-10-06.mp3

media_path='./Armantrout-Rae_06_Way_WPS1_NY_5-10-06.mp3'

In [None]:
## Create a temporary WAV copy of the MP3 and assign its pathname to a variable

wav_pathname = attk.temp_wav_path(media_path)

In [None]:
## View the pathname of our temporary file

wav_pathname

In [None]:
## Return file duration in seconds via ffprobe
## (accepts just about any audio or video format)

dur_secs = attk.duration(wav_pathname)

dur_secs

In [None]:
## Extract 12 Mel Frequency Ceptstral Coefficients (0th coefficient removed)
## and display 2 bins' worth of values

mfccs = attk.get_mfccs(wav_pathname)

mfccs[:2]

In [None]:
## Extract MFCC + delta + delta delta features and display 2 bins' worth of values

mfcc_d = attk.get_mfccs_and_deltas(wav_pathname)

mfcc_d[:2]

## *Working with ML classifier output values*

In [None]:
## Apply a rolling average to a list of values
## default window size: 10

smoothed_vals = attk.smooth([1,2,3,4,3,2,3,4,19,3,2,3,4,3,2,1,1,1,8,1,1,1,2,3,3,4,4,4,4,2])

smoothed_vals

In [None]:
## Apply a rolling average to a list of binary classifier output values
## default window size: 10

smoothed_classes = attk.smooth([0.1, 0.01, 1.0, 0.7, 0.99, 0.2, 0.7, 0.9, 0.1, 0.1, 0.2, 0.1])

smoothed_classes

In [None]:
## Round values to the nearest whole number using list comprehension notation

rounded_classes = [round(item) for item in smoothed_classes]

rounded_classes

In [None]:
## Convert a list of labels to a list of (start, end) pairs corresponding 
## to a specified label

range_pairs = attk.labels_to_ranges(rounded_classes, label=0)

range_pairs

In [None]:
## Determine the approximate location of vowel sounds in speech recordings
## bin size: 512

vowel_ranges = attk.get_vowel_segments(wav_pathname)

vowel_ranges

In [None]:
## Quickly excerpt WAV segments from media files using moviepy

media_path = 'Armantrout-Rae_06_Way_WPS1_NY_5-10-06.mp3'

wav_path = attk.subclip(media_path,15,21,out_dir='./') # starting at 15 seconds, ending at 21 seconds

In [None]:
## Create a WAV excerpt without specifying a destination directory
## (writes excerpt to directory containing the specified media file)

attk.subclip(media_path, 0.5, 15.75)

In [None]:
## View files in the current working directory

!ls

In [None]:
## Move media files into a new directory

!mkdir test_dir
!mv Armantrout-Rae_06_Way_WPS1_NY_5-10-06* test_dir/

In [None]:
!ls test_dir/

In [None]:
## Recursively scans a given directory and returns a list of pathnames for 
## every media file present

dir_path="./test_dir"

media_paths = attk.find_media_paths(dir_path)

media_paths

In [None]:
## Excerpting first 5 seconds from every media file in a directory,

import random

media_paths=[item for item in attk.find_media_paths(dir_path) if item[-4:].lower() in ('.mp3','.wav','.mp4')]

random.shuffle(media_paths)

for media_path in media_paths:
	attk.subclip(media_path, 1.3, 3.25, out_dir = './test_dir')

In [None]:
!ls

In [None]:
## Be sure to delete your temp file when you're finished using it.

os.remove(temp_media_path)