In [193]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
from scipy.io import wavfile


In [194]:
from google.colab import drive
drive.mount('/content/drive')
#files=glob.glob('/content/drive/My Drive/Data/emotion_classification/train/*.gif')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **import clean data**

In [195]:
samplerate, data = wavfile.read('/content/drive/My Drive/speech/speechFiles/clean.wav')

In [196]:
data.shape

(50000,)

In [197]:
freq=16
window_size=25
shift=10
new_data=[]
sample_ws=window_size*freq
sample_shift=shift*freq
for i in range(298):
  start=sample_shift*i
  end=start+sample_ws
  t=[]
  for j in range(start,end):
    t.append(data[j])
  new_data.append(t)
new_data=np.array(new_data)



# **Applying fourier transform on clean data**

In [198]:
import scipy
from scipy.fft import fft
clean_fft = scipy.fft.fft(new_data, n=256, axis=- 1)

In [199]:
clean_fft=np.delete(clean_fft,np.s_[128:],axis=1)

In [200]:
clean_fft.shape

(298, 128)

In [201]:
clean_fft=np.absolute(clean_fft)

In [202]:
clean_fft=np.log(clean_fft)


In [203]:
clean_fft

array([[6.12686918, 7.28144371, 5.93187779, ..., 2.04266071, 2.4049483 ,
        2.10012012],
       [6.65801105, 6.25178615, 5.45975573, ..., 1.17144881, 1.93212265,
        2.11231368],
       [5.44241771, 4.16271461, 3.94228034, ..., 1.48380558, 1.76626801,
        1.88496236],
       ...,
       [8.88544091, 8.9267673 , 9.15602101, ..., 6.48727899, 6.48970223,
        6.48789518],
       [9.39391115, 9.50173921, 9.58240528, ..., 6.35076434, 6.35518048,
        6.34611629],
       [6.87523209, 8.47187473, 9.32874946, ..., 6.32105638, 6.32385786,
        6.30811326]])

In [204]:
clean_cov=np.dot(clean_fft.T,clean_fft)



In [205]:
clean_cov

array([[19808.97012843, 20951.12415309, 21628.80714908, ...,
        13499.18981061, 13506.48454083, 13472.25365014],
       [20951.12415309, 22470.14951487, 23169.22547487, ...,
        14454.55070021, 14455.07383407, 14414.85861938],
       [21628.80714908, 23169.22547487, 24052.92971336, ...,
        14990.65746466, 14993.63621752, 14950.6135742 ],
       ...,
       [13499.18981061, 14454.55070021, 14990.65746466, ...,
         9830.78498293,  9819.46534003,  9800.85753942],
       [13506.48454083, 14455.07383407, 14993.63621752, ...,
         9819.46534003,  9817.68844518,  9795.45930878],
       [13472.25365014, 14414.85861938, 14950.6135742 , ...,
         9800.85753942,  9795.45930878,  9782.78568571]])

# **calculating whitening transform matrix**

In [206]:
val,vec=np.linalg.eigh(clean_cov)

In [207]:
val

array([3.03669252e-01, 4.15093930e-01, 5.40971748e-01, 7.88601662e-01,
       1.20070563e+00, 1.36867359e+00, 2.04580721e+00, 2.24755864e+00,
       2.89218856e+00, 3.64944451e+00, 4.84753136e+00, 8.73649163e+00,
       1.11493173e+01, 1.28100989e+01, 1.42460152e+01, 1.48862177e+01,
       1.64263292e+01, 1.68580205e+01, 1.81600370e+01, 2.00015677e+01,
       2.07669141e+01, 2.08755276e+01, 2.20174514e+01, 2.39255452e+01,
       2.45177845e+01, 2.62488692e+01, 2.76240207e+01, 2.87514011e+01,
       2.93479314e+01, 3.02967457e+01, 3.08361604e+01, 3.11572108e+01,
       3.27656013e+01, 3.50746725e+01, 3.53957537e+01, 3.70817916e+01,
       3.78189483e+01, 3.86433515e+01, 3.91816525e+01, 4.06406290e+01,
       4.14961499e+01, 4.25419794e+01, 4.40880772e+01, 4.61614795e+01,
       4.67462903e+01, 4.74800944e+01, 5.02065265e+01, 5.29072936e+01,
       5.35517793e+01, 5.42043103e+01, 5.66247802e+01, 5.76368361e+01,
       5.90020637e+01, 5.99773823e+01, 6.14739989e+01, 6.37369292e+01,
      

In [208]:
val1=np.power(val,-0.5)
L=np.diag(val1)
L



array([[1.81467807e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 1.55212541e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 1.35960486e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        1.67606012e-02, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 8.18945864e-03, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 7.02867553e-04]])

In [209]:
whitening_matrix=np.dot(L,vec.T)
whitening_matrix

array([[-7.57503278e-03,  5.56384496e-03,  3.37261900e-03, ...,
        -3.75475891e-01, -9.13378666e-02,  2.41386835e-01],
       [ 2.69346924e-03, -9.36052236e-03, -1.15031267e-02, ...,
        -3.32413852e-01,  1.47998007e-01,  4.68812247e-01],
       [ 7.02532124e-03, -3.32854348e-02,  3.91870821e-02, ...,
        -9.72662714e-03,  5.15595243e-02, -1.99061429e-01],
       ...,
       [ 2.88207660e-04, -4.21963570e-04,  1.32985881e-04, ...,
         3.83589970e-03,  3.76189087e-03,  3.94800836e-03],
       [-5.74616752e-04, -4.28594351e-04, -5.94290753e-04, ...,
         3.06839423e-04,  2.83058278e-04,  2.72727155e-04],
       [ 6.84568710e-05,  7.33879379e-05,  7.59957369e-05, ...,
         4.83354800e-05,  4.83335244e-05,  4.81903902e-05]])

In [210]:
whitening_matrix.shape

(128, 128)

# **import noisy wave and applying fourier transform**


In [211]:

samplerate, dataN = wavfile.read('/content/drive/My Drive/speech/speechFiles/noisy.wav')
new_dataN=[]

for i in range(298):
  start=sample_shift*i
  end=start+sample_ws
  t=[]
  for j in range(start,end):
    t.append(dataN[j])
  new_dataN.append(t)
new_dataN=np.array(new_dataN)

import scipy
from scipy.fft import fft

Noisy_fft = scipy.fft.fft(new_dataN, n=256, axis=- 1)
Noisy_fft=np.delete(Noisy_fft,np.s_[128:],axis=1)
Noisy_fft=np.absolute(Noisy_fft)
Noisy_fft=np.log(Noisy_fft)





In [212]:
print(Noisy_fft)

[[6.57228254 7.2689637  7.51343361 ... 4.08835662 6.089001   5.86758926]
 [6.50128967 6.74460944 6.16738372 ... 5.64418518 5.61202011 6.46444352]
 [5.81711116 5.92191086 7.46887439 ... 5.41718919 5.60109241 6.15282746]
 ...
 [9.02401079 8.60212118 9.05938871 ... 7.50551039 7.55369969 6.13319829]
 [9.43978404 9.67313641 9.49609749 ... 7.06316343 7.26396826 7.47753934]
 [6.60800063 8.53059936 9.39553144 ... 6.44949062 7.034915   7.1207715 ]]


# **Aplying whitening transform**

In [213]:
mean_clean=np.mean(clean_fft,axis=0)
mean_Noisy=np.mean(Noisy_fft,axis=0)

In [214]:
mean_Noisy.shape

(128,)

In [215]:
for i in range(clean_fft.shape[0]):
  clean_fft[i]=clean_fft[i]-mean_clean
for i in range(Noisy_fft.shape[0]):
  Noisy_fft[i]=Noisy_fft[i]-mean_Noisy

In [216]:
Noisy_fft

array([[-1.55370589, -1.46413313, -1.56220759, ..., -2.58472073,
        -0.5709525 , -0.82152015],
       [-1.62469876, -1.9884874 , -2.90825748, ..., -1.02889217,
        -1.04793339, -0.2246659 ],
       [-2.30887727, -2.81118598, -1.60676682, ..., -1.25588816,
        -1.05886109, -0.53628195],
       ...,
       [ 0.89802237, -0.13097566, -0.01625249, ...,  0.83243304,
         0.8937462 , -0.55591112],
       [ 1.31379561,  0.94003957,  0.42045629, ...,  0.39008608,
         0.60401476,  0.78842993],
       [-1.5179878 , -0.20249748,  0.31989024, ..., -0.22358673,
         0.3749615 ,  0.43166208]])

In [217]:
whitened_clean=np.dot(whitening_matrix,clean_fft.T)
whitened_noisy=np.dot(whitening_matrix,Noisy_fft.T)



In [218]:
print(whitened_clean)

[[-0.01997526  0.07904434 -0.02124915 ...  0.04558438  0.02965971
   0.05829295]
 [ 0.09803206 -0.0792565  -0.08459583 ... -0.01261663  0.0680168
   0.09738787]
 [ 0.08292263 -0.0237325  -0.15695839 ... -0.01014553  0.00025588
  -0.05178144]
 ...
 [-0.07393303 -0.11344556 -0.11825287 ...  0.04503419  0.04510261
   0.05126464]
 [-0.01432829 -0.02404592 -0.02131351 ... -0.03478547 -0.03005926
  -0.03843165]
 [-0.02200719 -0.02123205 -0.02056126 ...  0.00521129  0.00378292
   0.00400824]]


In [219]:
whitened_clean

array([[-0.01997526,  0.07904434, -0.02124915, ...,  0.04558438,
         0.02965971,  0.05829295],
       [ 0.09803206, -0.0792565 , -0.08459583, ..., -0.01261663,
         0.0680168 ,  0.09738787],
       [ 0.08292263, -0.0237325 , -0.15695839, ..., -0.01014553,
         0.00025588, -0.05178144],
       ...,
       [-0.07393303, -0.11344556, -0.11825287, ...,  0.04503419,
         0.04510261,  0.05126464],
       [-0.01432829, -0.02404592, -0.02131351, ..., -0.03478547,
        -0.03005926, -0.03843165],
       [-0.02200719, -0.02123205, -0.02056126, ...,  0.00521129,
         0.00378292,  0.00400824]])

In [220]:
whitened_noisy.shape

(128, 298)

# **Computing covariance matrix of whitened clean and Noisy data**

In [221]:
Wclean_cov=np.dot(whitened_clean,whitened_clean.T)
Wnoise_cov=np.dot(whitened_noisy,whitened_noisy.T)

In [222]:
Wclean_cov=Wclean_cov/298
Wnoise_cov=Wnoise_cov/298

In [223]:
Wclean_cov

array([[ 3.35570395e-03,  2.68352251e-09, -1.30472041e-08, ...,
         7.61660021e-08,  3.01320248e-08, -1.56740359e-06],
       [ 2.68352251e-09,  3.35569507e-03,  4.67874068e-08, ...,
        -2.73133872e-07, -1.08054465e-07,  5.62076254e-06],
       [-1.30472041e-08,  4.67874068e-08,  3.35547722e-03, ...,
         1.32794747e-06,  5.25349171e-07, -2.73275421e-05],
       ...,
       [ 7.61660021e-08, -2.73133872e-07,  1.32794747e-06, ...,
         3.34795246e-03, -3.06686210e-06,  1.59531618e-04],
       [ 3.01320248e-08, -1.08054465e-07,  5.25349171e-07, ...,
        -3.06686210e-06,  3.35449142e-03,  6.31122878e-05],
       [-1.56740359e-06,  5.62076254e-06, -2.73275421e-05, ...,
         1.59531618e-04,  6.31122878e-05,  7.27382541e-05]])

# **Calculating Average of Non diagonal elements of covariance matrix**

In [224]:
sum_clean=np.sum(np.absolute(Wclean_cov))-np.trace(Wclean_cov)
avg_clean=sum_clean/(128*128-128)
avg_clean

5.469742149290332e-07

In [225]:
sum_noisy=np.sum(np.absolute(Wnoise_cov))-np.trace(Wnoise_cov)
avg_noisy=sum_noisy/(128*128-128)
avg_noisy

0.0006737089234989374