In [2]:
import soundfile as sf
from utility import remove_silence
import librosa
import scipy
import numpy as np
import math

In [16]:
audio_file = 'Quit_Playing_Games.wav'

data, fs = remove_silence(audio_file, 0.0001)

In [17]:
def lpc_to_lpcc(lpc):
	lpcc=[]
	order= lpc.size-1
	lpcc.append(math.log(order))
	lpcc.append(lpcc[0])
	for i in range(2,order+1):
		sum1=0
		for j in range(1,i):
			sum1+=j/i*lpc[i-j-1]*lpcc[j]
		c= -lpc[i-1]+sum1
		lpcc.append(c)
	return lpcc

# Function to compute feature for each frame
def vocal_features(frames, SpectralFlux_frames, order):
	features_frames = []
	for i in range(len(frames)):
		features = np.zeros(order+3)
		frame = frames[i]
		lpc = librosa.core.lpc(frame, order)
		lpcc = lpc_to_lpcc(lpc)
		zcr = 0.0
		for i in range(1,frame.shape[0]):
			if (frame[i]>=0 and frame[i-1]<0) or (frame[i]<0 and frame[i-1]>=0):
			 	zcr += 1
		zcr = zcr/frame.shape[0]
		sf = SpectralFlux_frames[i]
		features[0:order+1] = lpcc
		features[order+1] = zcr
		features[order+2] = sf
		features_frames.append(features)
	return np.array(features_frames)

def SpectralFlux(X, f_s):

    # difference spectrum (set first diff to zero)
    X = np.c_[X[:, 0], X]
    # X = np.concatenate(X[:,0],X, axis=1)
    afDeltaX = np.diff(X, 1, axis=1)

    # flux
    vsf = np.sqrt((afDeltaX**2).sum(axis=0)) / X.shape[0]

    return (vsf)





In [18]:
n_samples = data.shape[0]
# sf.write('no_silence.wav', data, fs)

# Divide audio into frames
# Frame size in ms
frame_size = 20.0
n_per_frame = int( (frame_size/1000)*fs )

frames_data = []
overlapping_rate = 0.5
order = 5

In [19]:
f,t,Sxx = scipy.signal.spectrogram(data, fs, nperseg=n_per_frame, noverlap= int(n_per_frame*(1-overlapping_rate)))
for i in range(0, n_samples, int(n_per_frame*(1-overlapping_rate))):
	if i+n_per_frame<=n_samples:
		frames_data.append( data[i:i+n_per_frame] )

SpectralFlux_frames = SpectralFlux(Sxx, fs)
features_frames = vocal_features(frames_data, SpectralFlux_frames, order)

import pandas as pd

df= pd.DataFrame(features_frames)

print (df)

              0         1         2         3         4         5         6  \
0      1.609438  1.609438  3.056758 -1.448251 -1.636682  1.599141  0.004535   
1      1.609438  1.609438  3.461422 -2.617050 -2.257196  4.928653  0.028345   
2      1.609438  1.609438  2.781913 -1.595387 -0.720001  1.793056  0.049887   
3      1.609438  1.609438  2.890841 -1.610522 -1.017001  1.805237  0.105442   
4      1.609438  1.609438  3.006982 -1.715605 -1.315439  2.030582  0.120181   
5      1.609438  1.609438  3.120608 -1.814827 -1.660770  2.303644  0.074830   
6      1.609438  1.609438  3.000270 -1.648583 -1.327125  1.837737  0.089569   
7      1.609438  1.609438  2.926538 -1.668915 -1.145684  2.060899  0.100907   
8      1.609438  1.609438  3.046860 -1.923688 -1.344745  2.747368  0.100907   
9      1.609438  1.609438  3.148871 -2.123923 -1.499889  3.261838  0.102041   
10     1.609438  1.609438  3.191856 -2.197610 -1.560036  3.438797  0.080499   
11     1.609438  1.609438  3.103828 -1.956769 -1.485

In [15]:
df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8
count,23013.0,23013.0,23013.0,23013.0,23013.0,23013.0,23013.0,23013.0,23013.0
mean,1.609438,1.609438,2.787696,-1.461231,-1.038136,1.798613,0.092538,5.821116e-08,0.000351
std,5.593425e-13,5.593425e-13,0.368318,0.506962,0.674876,1.100723,0.060053,8.602865000000001e-23,0.053255
min,1.609438,1.609438,1.213893,-3.990885,-3.828761,-2.558464,0.002268,5.821116e-08,0.0
25%,1.609438,1.609438,2.532748,-1.800704,-1.512171,1.026391,0.052154,5.821116e-08,0.0
50%,1.609438,1.609438,2.82259,-1.441498,-1.051318,1.620834,0.080499,5.821116e-08,0.0
75%,1.609438,1.609438,3.052143,-1.118161,-0.53082,2.432456,0.11678,5.821116e-08,0.0
max,1.609438,1.609438,4.039024,0.637486,0.584447,8.99502,0.517007,5.821116e-08,8.078776


In [7]:
df2= df.drop([0,1],axis=1)
print (df2)

              2         3         4         5         6             7
0      3.056758 -1.448251 -1.636682  1.599141  0.004535  5.821116e-08
1      3.461422 -2.617050 -2.257196  4.928653  0.028345  5.821116e-08
2      2.781913 -1.595387 -0.720001  1.793056  0.049887  5.821116e-08
3      2.890841 -1.610522 -1.017001  1.805237  0.105442  5.821116e-08
4      3.006982 -1.715605 -1.315439  2.030582  0.120181  5.821116e-08
5      3.120608 -1.814827 -1.660770  2.303644  0.074830  5.821116e-08
6      3.000270 -1.648583 -1.327125  1.837737  0.089569  5.821116e-08
7      2.926538 -1.668915 -1.145684  2.060899  0.100907  5.821116e-08
8      3.046860 -1.923688 -1.344745  2.747368  0.100907  5.821116e-08
9      3.148871 -2.123923 -1.499889  3.261838  0.102041  5.821116e-08
10     3.191856 -2.197610 -1.560036  3.438797  0.080499  5.821116e-08
11     3.103828 -1.956769 -1.485861  2.800992  0.071429  5.821116e-08
12     3.043351 -1.804873 -1.363768  2.353128  0.065760  5.821116e-08
13     3.129201 -1.9

In [6]:
Ls=30
Tc=3
n_clusters= int((Ls)/Tc)

In [16]:
from sklearn.preprocessing import normalize
data_scaled = normalize(df2)
data_scaled = pd.DataFrame(data_scaled)

data_scaled.head()


Unnamed: 0,0,1,2,3,4,5
0,0.748513,-0.354636,-0.400777,0.391584,0.001111,1.425426e-08
1,0.498484,-0.376885,-0.325062,0.709782,0.004082,8.383065e-09
2,0.742961,-0.426077,-0.192289,0.478868,0.013323,1.554636e-08
3,0.740146,-0.412344,-0.260384,0.462198,0.026997,1.490389e-08
4,0.711658,-0.40603,-0.311323,0.480575,0.028443,1.377675e-08


In [29]:
from sklearn.cluster import AgglomerativeClustering
cluster = AgglomerativeClustering(n_clusters, affinity='euclidean', linkage='ward')  

In [30]:
clusters=cluster.fit_predict(data_scaled)

In [32]:
print (clusters.shape)

(23013,)


In [34]:
minCluster=[24000]*10
maxCluster=[0]*10

for i in range(clusters.shape[0]):
    minCluster[clusters[i]]= min(minCluster[clusters[i]],i)
    maxCluster[clusters[i]]= max(maxCluster[clusters[i]],i)

print (minCluster)
print (maxCluster)


[2, 4, 260, 26, 0, 61, 34, 1, 154, 62]
[22786, 23009, 22988, 22783, 22995, 22781, 22997, 23012, 22694, 22780]
