In [1]:
import numpy as np
import librosa 
import glob
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf


In [2]:
def load_spectogram(dirname):
  i=0;
  filename= 'temp'
  for name in glob.glob(dirname+'/*.npy'):
    spectog = np.load(name)
    name = name[12:len(name)-4]
    if(i==0):
      spec= np.empty((0,spectog.shape[0],spectog.shape[1]))
    #  name = name[12:len(name)-4]
      filename = name
      i=1
    elif(i==1):
      filename = np.vstack([filename,name])
    spectog=spectog.reshape((1,spectog.shape[0],spectog.shape[1]))
    spec=np.vstack([spec,spectog])
  #print(filename)
  return filename,spec

In [3]:
def meanfilt (x, k):
    """Apply a length-k mean filter to a 1D array x.
    Boundaries are extended by repeating endpoints.
    """
    
    import numpy as np
    x = np.reshape(x,(x.shape[1]))
    assert k % 2 == 1, "Median filter length must be odd."
    assert x.ndim == 1, "Input must be one-dimensional."
    
    k2 = (k - 1) // 2
    y = np.zeros ((len (x), k), dtype=x.dtype)
    y[:,k2] = x
    for i in range (k2):
        j = k2 - i
        y[j:,i] = x[:-j]
        y[:j,i] = x[0]
        y[:-j,-(i+1)] = x[j:]
        y[-j:,-(i+1)] = x[-1]
    return np.mean (y, axis=1)

In [4]:
def calc_threshold_and_rmse(spec):
  thres=np.empty((spec.shape[0],1))
  rmse = np.empty((0,spec[0].shape[1]))
  for i in range(0,spec.shape[0]):
      a=librosa.decompose.nn_filter(spec[i],aggregate=np.average)
      rmse_val = librosa.feature.rms(S=a,frame_length=1024, hop_length=512)
      rmse_val = meanfilt(rmse_val,11)
      rmse=np.vstack([rmse,rmse_val])
      max=np.max(rmse_val[15:rmse_val.shape[0]])
      min=np.min(rmse_val)
      thres[i] =  (0.965)*max+(0.035)*(min)
      thresarr = np.full((spec[0].shape[1],1),thres[i])
      #plt.figure()
      #plt.plot(rmse_val, label='RMS Energy')
      #plt.plot(thresarr)
  return thres,rmse

In [5]:
def calc_timestamp(thres,rmse,filename):
  time_stamp = np.empty((0,3))
  a = np.empty((1,3))
  names = np.empty((0,1))
  for i in range(0,rmse.shape[0]):
      j=0
      prev=rmse[j];
      j=1
      while j<rmse.shape[1]:
          if(rmse[i][j]<=thres[i]):
              index=i
              onset=j
              while (j<rmse.shape[1] and rmse[i][j]<=thres[i]):
                  j=j+1
              offset=j
              a[0][0]=index
              a[0][1]=onset
              a[0][2]=offset
              #print(filename[index],onset,offset,index)
              if(offset - onset >=25):
                time_stamp=np.vstack([time_stamp,a])
                names = np.vstack([names,filename[index]])
          if(j<rmse.shape[1] and rmse[i][j]>thres[i]):
              j=j+1;
  return time_stamp,names

In [6]:
import random as rd
class Kmeans:
    def __init__(self,X,lab,K):
        self.X=X
        self.Output={}
        self.Centroids=np.array([]).reshape(self.X.shape[1],0)
        self.K=K
        self.m=self.X.shape[0]
        self.lab=lab
        self.labb=[0,1]
    def kmeanspp(self,X,K):
        for i in range(K):
            rand=rd.randint(0,self.m-1)
            self.Centroids=np.c_[self.Centroids,X[rand]]
        return self.Centroids
    def fit(self,n_iter):
        self.Centroids=self.kmeanspp(self.X,self.K)
        Output={}
        for j in range(n_iter):
            EuclidianDistance=np.array([]).reshape(self.m,0)
            for k in range(self.K):
                tempDist=np.sum((self.X-self.Centroids[:,k])**2,axis=1)
                EuclidianDistance=np.c_[EuclidianDistance,tempDist]
            a = np.average(EuclidianDistance,axis=0)
            b = np.average(a)
            C=np.argmin(EuclidianDistance,axis=1)+1
            Y={}
            la={}
            for k in range(self.K):
                Y[k+1]=np.array([]).reshape(513,0)
                la[k+1]=np.array([]).reshape(1,0)
                
            for i in range(self.m):
                Y[C[i]]=np.c_[Y[C[i]],self.X[i]]
                la[C[i]]=np.append(la[C[i]],self.lab[i])
     
            for k in range(self.K):
                Y[k+1]=Y[k+1].T
                la[k+1]=la[k+1].T
                print(Y[k+1].shape)
                
            for k in range(self.K):
                self.Centroids[:,k] = np.mean(Y[k+1],axis=0)
            print('cluster1:',np.average(la[1]),'cluster2:',np.average(la[2]),'speech:0, music:1')
            plt.figure()
            color=['red','blue','green','cyan','magenta']
            labels=['cluster1','cluster2','cluster3','cluster4','cluster5']
            for k in range(self.K):
                plt.scatter(np.average(Y[k+1][:,1:250],axis=1),np.average(Y[k+1][:,250:513],axis=1),c=color[k],label=labels[k])
            plt.scatter(np.average(self.Centroids[1:250,:],axis=0),np.average(self.Centroids[250:513,:],axis=0),s=300,c='yellow',label='Centroids')
            plt.title('Clusters ')
            plt.xlabel('Average spectogram feature values (1:250)')
            plt.ylabel('Average spectogram feature values (250:513)')
            plt.legend()
            plt.show()
            if(np.average(la[1])>np.average(la[2])):
              self.labb[0]=1
              self.labb[1]=0
            else:
              self.labb[0]=0
              self.labb[1]=1
            self.Output=Y
            print(self.labb)
        self.saveweights(self.Centroids, self.labb, 'centroid.npy', 'labb.npy')      
    
    def saveweights(self,centroid, label ,centroid_fn, label_fn):
      assert centroid_fn[-4:]=='.npy'
      assert label_fn[-4:]=='.npy'  
      np.save(centroid_fn, centroid)
      np.save(label_fn, label)
      return

    def readweights(self,centroid_fn, label_fn):
      self.Centroids = np.load(centroid_fn) 
      a = np.load(label_fn)
      self.labb[0] = a[0]
      self.labb[1] = a[1]
      print(self.labb)
      return self.Centroids, self.labb
    
    def predict(self,X):
        EuclidianDistance=np.array([]).reshape(X.shape[0],0)
        for k in range(self.K):
            tempDist=np.sum((X-self.Centroids[:,k])**2,axis=1)
            EuclidianDistance=np.c_[EuclidianDistance,tempDist]
        a = np.average(EuclidianDistance,axis=0)
        b = np.average(a)
        ll = np.zeros((X.shape[0],))
        for i in range(0,EuclidianDistance.shape[0]):
          if(EuclidianDistance[i][0]<EuclidianDistance[i][1]):
            ll[i]=self.labb[0]
          else:
            ll[i]=self.labb[1]
        C=np.argmin(EuclidianDistance,axis=1)+1
        Y={}
        for k in range(self.K):
            Y[k+1]=np.array([]).reshape(513,0)
                
        for i in range(X.shape[0]):
            Y[C[i]]=np.c_[Y[C[i]],X[i]]
     
        for k in range(self.K):
            Y[k+1]=Y[k+1].T

        return Y,self.Centroids.T,EuclidianDistance,ll

In [7]:
def aggregate_label_kmeans(x,weight_speech,weight_music):
  count_s = 0
  count_m = 0
  for i in x:
    if(i==0):
      count_s=count_s+1
    if(i==1):
      count_m=count_m+1
  if(count_s*weight_speech > count_m*weight_music):
    return 0
  else:
    return 1

In [8]:
def label_timestamp_kmeans(spec,time_stamp,wt_speech,wt_music,kmeans):
  l = np.full((time_stamp.shape[0],1),0) 
  k=0
  for i in time_stamp:
    temp_spec = spec[int(i[0])][:,int(i[1]):int(i[2])]
    Output,Centroids,distance,labels = kmeans.predict(temp_spec.T)
    if(aggregate_label_kmeans(labels,wt_speech,wt_music)==0):
      l[k][0]=0
    else:
      l[k][0]=1
    k=k+1
  return l

In [9]:
if __name__=="__main__":
      #audio_data,spec,sr,music_data, speech_data, filename, onset, offset, label  = readDir('wav')
    filename,spec = load_spectogram('spectrogram')########
    thres,rmse = calc_threshold_and_rmse(spec)
    time_stamp,names = calc_timestamp(thres,rmse,filename)
  #speech_spec, music_spec, label, combine = combine_speech_music_spec(speech_data,music_data)
  #speech_spec, music_spec, label, combine = read_and_combine_speech_music_spec() ###############################

  ##############Kmeans################################
  #wt_speech,wt_music,kmeans = train_Kmeans(combine,label,2,30,speech_spec,music_spec)
    kmeans=Kmeans(spec[0].T,3,2)#################
    kmeans.readweights('Kmeans_weights/centroid.npy','Kmeans_weights/labb.npy')#####################
    wt_speech=np.load('Kmeans_weights/wt_speech.npy');####################
    wt_music=np.load('Kmeans_weights/wt_music.npy');##################
    l=label_timestamp_kmeans(spec,time_stamp,wt_speech,wt_music,kmeans)
    for i  in range(0,time_stamp.shape[0]):
        time_stamp[i,1]= ((time_stamp[i,1]-1)*512 + 1024)/16000
        time_stamp[i,2]= ((time_stamp[i,2]-1)*512 + 1024)/16000
    final_time_stamp_kmeans=np.hstack([time_stamp,l])
    k=0
    for i in range(0,l.shape[0]):
        if(l[i][0]==1):
            if(k==0):
                b='Speech'
                k=1
            else:
                b=np.vstack([b,'Speech'])
        else:
            if(k==0):
                b='Music'
                k=1
            else:
                b=np.vstack([b,'Music'])
    DF1 = pd.DataFrame(names,columns=['filename'])
    DF2 = pd.DataFrame(b,columns=['event'])
    DF3 = pd.DataFrame(final_time_stamp_kmeans[:,1],columns=['onset'])
    DF4 = pd.DataFrame(final_time_stamp_kmeans[:,2],columns=['offset'])
    frames = [DF1,DF2,DF3,DF4]
    result = pd.concat(frames,axis=1)
    #final = np.hstack([names.reshape((names.shape[0],)).T,final_time_stamp_NN[:,1].T,final_time_stamp_NN[:,2].T,b.reshape((b.shape[0],)).T])
    print(result)
    result.to_csv('Kmeans_event_detection.csv',index=False)

  ################NN####################################

[1, 0]
                filename   event  onset  offset
0                   S009  Speech  0.320   2.528
1                   S009  Speech  3.520   5.600
2                   S009  Speech  6.656   9.280
3           music_noisy8   Music  0.256   2.592
4           music_noisy8  Speech  4.128   7.136
..                   ...     ...    ...     ...
96          music_noisy3   Music  3.296   8.736
97   music+speech_noisy5   Music  0.224   2.016
98   music+speech_noisy5   Music  2.400   3.840
99   music+speech_noisy5   Music  4.576   6.336
100  music+speech_noisy5   Music  7.136   8.512

[101 rows x 4 columns]
