# Segment a Wav Dataset into Frames of n Seconds with Overlapping

## GTzan Dataset

http://marsyasweb.appspot.com/download/data_sets/

The dataset consists of 1,000 audio tracks each 30 seconds long.

It contains 10 genres, each represented by 100 tracks.

The tracks are all 22,050Hz Mono 16-bit audio files in .wav format.


##### 8 May 2019
#### Alessandro L. Koerich

In [1]:
import numpy as np
import os
import soundfile as sf
import math

import matplotlib.pyplot as plt
import pylab as py
from scipy import fftpack
from scipy import signal
from pylab import *
import scipy.io.wavfile

ModuleNotFoundError: No module named 'soundfile'

In [None]:
# Path to source WAV files already split into folds
path2='../Temp_Data/GTzan_3s'

# Path to destination (segmentated WAV files) 
# Win_length_overlapping
path3='GTzan_3f_Win_110250_75'

## Build a list of WAV files

In [None]:
# Build a list of WAV files
file_list = []
i = 0 
for path, subdirs, files in os.walk( path2 ):
    for name in files:
        file_list.append( os.path.join(path, name) )
        i += 1
print("Files processed: "+str(i) )

In [None]:
file_list

## Check if the audio samples have at least 5 seconds

In [None]:
# This block of code is to make sure that all of the audio samples have at least XX seconds

# For a sample rate = 22,050 
#  1s = 22,050 x 1  =  22,050 samples 
#  5s = 22,050 x 5  = 110,250 samples
# 10s = 22,050 x 10 = 220,500 samples

length = list()
i      = 0
avg    = 0

for file in file_list:
    data, samplerate = sf.read( file )
    #print(str(samplerate))
    if len(data) <= 110250:                         
            print("Audio with less than 5s: "+str(file) )
    #computer average lenght of files
    avg = avg + len(data)
    length.append(len(data))
    i += 1

print( "Files processed: "+str(i) )

print( "Average file length: "+str(avg/i)+" samples   "+str(avg/i/samplerate)+"s   "+str(avg/i/samplerate/60)+"min" )

print( "Max length: "+str(max(length))+" samples   "+str(max(length)/samplerate)+"s   "+str(max(length)/samplerate/60)+"min" )

print( "Min length: "+str(min(length))+" samples   "+str(min(length)/samplerate)+"s   "+str(min(length)/samplerate/60)+"min" )

## Sliding Window Setup

In [None]:
# window  = size of window (1,600 - 8,000 - 16,000 - 32,000, etc.)
# overlap = percentage of overlapping (0, 0.25, 0.50, 0.75, etc.)

# sample rate = 22,050
#  1s = 22,050 x 1 =  22,050 samples
#  5s = 22,050 x 5 = 110,250
# 10s = 22,050 x 10 = 220,500

def window(data, window, overlap):
    
    overlap = 1 - overlap
    print ( str( window * overlap) )
    print ( str( len(data) / (window*overlap) ) )

    # end = int( ceil( ( len(data) / (window*overlap) ) ) )
    end = int( ceil( ( (len(data) - window ) / (window*overlap) ) ) )
    print(" End: "+str(end) )
    
    # Make a 2-D array of samples
    windowed = []
    if end == 0:
        end = end + 1
    for i in range(0, end):
        
            if len( data[i*( int( window*overlap ) ):i*( int( window*overlap) ) + window] ) == window:
                windowed.append(data[i*( int( window*overlap ) ):i * ( int( window*overlap ) ) + window ] )
                # print("Done")
                
            if len(data[i*(int(window*overlap)):i*(int(window*overlap))+window])<window:
                spch = np.zeros(window)
                spch[0:len(data[i*(int(window*overlap)):i*(int(window*overlap))+window])]=data[0:len(data[i*(int(window*overlap)):i*(int(window*overlap))+window])]
                spch[len(data[i*(int(window*overlap)):i*(int(window*overlap))+window])+1:window]=data[len(data[i*(int(window*overlap)):i*(int(window*overlap))+window])-1]
                windowed.append(spch)
                print("Warning: shorter than frame size-> zero padding")
                
    return (windowed)

## Energy of the Audio Signal

In [None]:
# Compute the signal energy
def energy(samples):
    return np.sum(np.power(samples, 2.)) / float(len(samples))

## Write Metadata in a CSV File

In [None]:
# WRITE TO FILE METADATA

import csv

filename="GTzan3f_Spltr_110_2Voc.csv"

fl = open(filename, 'w')

writer = csv.writer(fl)

values = ['track_id','frame_id','fold','genre']

writer.writerow(values)

## Segment Audio Files with the Sliding Window

### Each segment is saved on disk with the appropriate prefix indicating the segment number (_segment number).

### Segment of low energy (possibly silence) are discarded

In [None]:
# Audio files are segmented and each segment is saved on disk with the approporiate prefix indicating the segment
# number.
# np.boxcar(frame_size)
# sample rate = 22,050
#  1s = 22,050 x 1 =  22,050
#  5s = 22,050 x 5 = 110,250
# 10s = 22050 x 10 = 220,500

# Split 30s audio samples into 5s segments with an overlapping of 75%
# 21 segments per audio sample
frame_size  = 110250
frame_over  = 0.75
track_id    = 1

for file in file_list:
    data , samplerate = sf.read( file )
    print ("--------------")
    print ("Sample Rate: "+str(samplerate)+"   Length: "+str(data.shape))
    print (str( file ) )
        
    frame_id = 1
            
    windowed = window( data, frame_size, frame_over )

    for i in range(len( windowed ) ):
        # print ("Size: "+str(windowed[i].shape) )
        # print ("-----------------------")
        spch = windowed[i] * signal.boxcar(frame_size)
        
        
        ##### Evaluate Energy
        ##### Only save a segment if it has enough energy
        if energy(spch) > 1e-7:
        
            seg_name_new = str(track_id)+'_'+str(frame_id)
            # seg_name_new = str(track_id)+'_'+str(artist_id)+'_'+str(frame_id)
            # seg_name     = str(i)+'_'+file[9:].split('/')[4]
            directory = path3+'/'+file[22:].split('/')[0]+'/'+file[23:].split('/')[1]
            
            # Verify if it exists, otherwise create it
            if not os.path.exists(directory):
                os.makedirs(directory)
            
        
            sf.write( path3+'/'+file[22:].split('/')[0]+'/'+file[23:].split('/')[1]+'/'+seg_name_new+".wav", spch, samplerate ) 
            # sf.write( path3+'/'+file[9:].split('/')[3]+'/'+seg_name, spch, samplerate ) 
            print( path3+'/'+file[22:].split('/')[0]+'/'+file[23:].split('/')[1]+'/'+seg_name_new+".wav")
        
            fold_id = file[22:].split('/')[0]
        
            genre_id = str(file[22:].split('/')[1])
        
            # Write a line in .CSV file
            values = [str(track_id), str(frame_id), str(fold_id), str(genre_id) ]  
        
            frame_id += 1
        
            writer.writerow(values)
            
        else:
            print("Energy = "+str(energy(spch)*100)+"%   - Silence file: "+str(i) )
        
    track_id += 1
        

In [None]:
print("Audio segmentation finished")