In [None]:
import os
import librosa
import numpy as np
import pandas as pd
import soundfile as sf
import librosa.display
!pip install pydub
from pydub import AudioSegment
from scipy.io import wavfile
import IPython.display as ipd
from matplotlib import pyplot as plt
from sklearn.decomposition import FastICA
from scipy.signal import butter, lfilter

import warnings
warnings.filterwarnings('ignore')

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
# Counting total number of generated overlap chunks
audio_path = np.zeros(0)
class_labels = np.zeros(0)

entries = os.listdir("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Overlap Chunk Data")

for audio in entries:
  
  class_audio = os.listdir("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Overlap Chunk Data/"+audio)
  
  for j in range(len(class_audio)):
    audio_path = np.hstack((audio_path, "/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Overlap Chunk Data/"+audio+"/"+class_audio[j]))
    class_labels = np.hstack((class_labels, audio))

print("Total Chunks = {}".format(audio_path.shape))

Total Chunks = 389778


In [None]:
# Making new CSV file of audiopath and classlabels for chunk data

temp = np.vstack([audio_path,class_labels])
dataframe = temp.T

path_labels = pd.DataFrame(dataframe, columns=['AudioPath', 'ClassLabels'])
path_labels.to_csv('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Final_OverlapData_audioPath_classLabels.csv', index=False)
path_labels.head()

In [None]:
# Loading CSV file of overlap chunk data path and classlabels

audioPath_labels = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Final_OverlapData_audioPath_classLabels.csv")
print(audioPath_labels)

                                                AudioPath ClassLabels
0       /content/drive/MyDrive/Colab Notebooks/Fall 20...      aldfly
1       /content/drive/MyDrive/Colab Notebooks/Fall 20...      aldfly
2       /content/drive/MyDrive/Colab Notebooks/Fall 20...      aldfly
3       /content/drive/MyDrive/Colab Notebooks/Fall 20...      aldfly
4       /content/drive/MyDrive/Colab Notebooks/Fall 20...      aldfly
...                                                   ...         ...
389773  /content/drive/MyDrive/Colab Notebooks/Fall 20...      yetvir
389774  /content/drive/MyDrive/Colab Notebooks/Fall 20...      yetvir
389775  /content/drive/MyDrive/Colab Notebooks/Fall 20...      yetvir
389776  /content/drive/MyDrive/Colab Notebooks/Fall 20...      yetvir
389777  /content/drive/MyDrive/Colab Notebooks/Fall 20...      yetvir

[389778 rows x 2 columns]


In [None]:
# Functions for Bandpass Filtering to remove noise from the audios

def butter_bandpass(lowcut, highcut, fs, order=5):
  nyq = 0.5 * fs
  low = lowcut / nyq
  high = highcut / nyq
  b, a = butter(order, [low, high], btype='band')
  return b, a


def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
  b, a = butter_bandpass(lowcut, highcut, fs, order=order)
  y = lfilter(b, a, data)
  return y

**Extracting Statistical Features**

In [None]:
errorFileId = []

noChunks = 500
sr = 44100  

# Chunk Data and Labels
chunkLabels = np.zeros(noChunks, dtype = 'U256')
chunkPath = np.zeros(noChunks, dtype = 'U1024')

# Feature Variables
mfcc_final = np.zeros((noChunks, 20))
zcr_final = np.zeros((noChunks))
cs_final = np.zeros((noChunks, 12))
rolloff_final = np.zeros((noChunks))
RMSE_final = np.zeros((noChunks))
centroids_final = np.zeros((noChunks))
contrast_final = np.zeros((noChunks, 7))
bandwidth_final = np.zeros((noChunks))
tonnetz_final = np.zeros((noChunks, 6))
flatness_final = np.zeros((noChunks))
cqt_final = np.zeros((noChunks, 12))
cens_final = np.zeros((noChunks, 12))

sr = 44100
lowcut = 1000.0
highcut = 2400.0
ica = FastICA(n_components=1)

print("Chunks of 500 :")

for i in range(0, len(audioPath_labels["AudioPath"])//noChunks):   

  curChunk = 0
  for id in range(i*noChunks,(i+1)*noChunks):
    if (id%100 == 0 or id==(len(audioPath_labels)-1)):
      print("Processed Audios : ", id)

    try:
      x1, sr1 = librosa.load(audioPath_labels["AudioPath"][id], sr = sr)

      # Bandpass
      band_out = butter_bandpass_filter(x1, lowcut, highcut, sr, order=6)
      
      # ICA
      ica_out = np.asarray(ica.fit_transform(band_out.reshape(-1,1)))
      
      chunkData = ica_out.reshape(-1)
      chunkLabels[curChunk] = audioPath_labels["ClassLabels"][id]
      chunkPath[curChunk] = audioPath_labels["AudioPath"][id]

      # Zero Crossing Rate Feature
      zero_crossings = librosa.feature.zero_crossing_rate(chunkData)
      zcr_final[curChunk] = np.mean(zero_crossings)

      # MFCC Feature
      mfccs = librosa.feature.mfcc(chunkData, sr)
      mfcc_final[curChunk] = np.mean(mfccs, axis=1)

      # Chroma Shift Feature
      chroma_shift = librosa.feature.chroma_stft(chunkData, sr)
      cs_final[curChunk] = np.mean(chroma_shift, axis=1)

      # Rolloff Feature
      rolloff = librosa.feature.spectral_rolloff(chunkData, sr)
      rolloff_final[curChunk] = np.mean(rolloff)

      # RMSE Feature
      RMSE = librosa.feature.rms(chunkData)
      RMSE_final[curChunk] = np.mean(RMSE)

      # Spectral Centroid Feature
      spectral_centroids = librosa.feature.spectral_centroid(chunkData, sr)
      centroids_final[curChunk] = np.mean(spectral_centroids)

      # Spectral Contrast Feature
      spectral_contrast = librosa.feature.spectral_contrast(chunkData, sr)
      contrast_final[curChunk] = np.mean(spectral_contrast, axis=1)

      # Spectral Bandwidth Feature
      spectral_bandwidth = librosa.feature.spectral_bandwidth(chunkData, sr)
      bandwidth_final[curChunk] = np.mean(spectral_bandwidth)

      # Tonnetz Feature
      tonnetz = librosa.feature.tonnetz(chunkData, sr)
      tonnetz_final[curChunk] = np.mean(tonnetz, axis=1)

      # Spectral Flatness Feature
      spectral_flatness = librosa.feature.spectral_flatness(chunkData)
      flatness_final[curChunk] = np.mean(spectral_flatness)

      # Chroma Constant-Q Chromogram Feature
      chroma_cqt = librosa.feature.chroma_cqt(chunkData, sr)
      cqt_final[curChunk] = np.mean(chroma_cqt, axis=1)

      # Chroma CENS (Chroma Energy Normalized) Feature
      chroma_cens = librosa.feature.chroma_cens(chunkData, sr)
      cens_final[curChunk] = np.mean(chroma_cens, axis=1)

    except Exception as e:
      print(e)
      errorFileId.append(id)
      print("File Path: " + audioPath_labels["AudioPath"][id])

    curChunk = curChunk + 1

  #Saving file
  temp = np.vstack([chunkLabels,chunkPath])
  dataframe = temp.T
  path_labels = pd.DataFrame(dataframe, columns=['AudioPath', 'ClassLabels'])

  path_labels = path_labels.assign(ZeroCrossingsRate=zcr_final, SpectralRolloff=rolloff_final,
                      RMSEnergy=RMSE_final, SpectralCentroids=centroids_final, 
                      SpectralBandwidth=bandwidth_final, MFCC0 = mfcc_final[:, 0], 
                      MFCC1 = mfcc_final[:, 1], MFCC2 = mfcc_final[:, 2], MFCC3 = mfcc_final[:, 3],
                      MFCC4 = mfcc_final[:, 4], MFCC5 = mfcc_final[:, 5], MFCC6 = mfcc_final[:, 6],
                      MFCC7 = mfcc_final[:, 7], MFCC8 = mfcc_final[:, 8], MFCC9 = mfcc_final[:, 9],
                      MFCC10 = mfcc_final[:, 10], MFCC11 = mfcc_final[:, 11], MFCC12 = mfcc_final[:, 12],
                      MFCC13 = mfcc_final[:, 13], MFCC14 = mfcc_final[:, 14], MFCC15 = mfcc_final[:, 15],
                      MFCC16 = mfcc_final[:, 16], MFCC17 = mfcc_final[:, 17], MFCC18 = mfcc_final[:, 18],
                      MFCC19 = mfcc_final[:, 19], 
                      CS0 = cs_final[:, 0], CS1 = cs_final[:, 1], CS2 = cs_final[:, 2], CS3 = cs_final[:, 3], CS4 = cs_final[:, 4], 
                      CS5 = cs_final[:, 5], CS6 = cs_final[:, 6], CS7 = cs_final[:, 7], CS8 = cs_final[:, 8], CS9 = cs_final[:, 9], 
                      CS10 = cs_final[:, 10], CS11 = cs_final[:, 11], 
                      Ccqt0 = cqt_final[:, 0], Ccqt1 = cqt_final[:, 1], Ccqt2 = cqt_final[:, 2], Ccqt3 = cqt_final[:, 3], 
                      Ccqt4 = cqt_final[:, 4], Ccqt5 = cqt_final[:, 5], Ccqt6 = cqt_final[:, 6], Ccqt7 = cqt_final[:, 7], 
                      Ccqt8 = cqt_final[:, 8], Ccqt9 = cqt_final[:, 9], Ccqt10 = cqt_final[:, 10], Ccqt11 = cqt_final[:, 11], 
                      Ccens0 = cens_final[:, 0], Ccens1 = cens_final[:, 1], Ccens2 = cens_final[:, 2], Ccens3 = cens_final[:, 3], 
                      Ccens4 = cens_final[:, 4], Ccens5 = cens_final[:, 5], Ccens6 = cens_final[:, 6], Ccens7 = cens_final[:, 7], 
                      Ccens8 = cens_final[:, 8], Ccens9 = cens_final[:, 9], Ccens10 = cens_final[:, 10], Ccens11 = cens_final[:, 11], 
                      SC0 = contrast_final[:, 0], SC1 = contrast_final[:, 1], SC2 = contrast_final[:, 2], 
                      SC3 = contrast_final[:, 3], SC4 = contrast_final[:, 4], SC5 = contrast_final[:, 5], SC6 = contrast_final[:, 6], 
                      TZ0 = tonnetz_final[:, 0], TZ1 = tonnetz_final[:, 1], TZ2 = tonnetz_final[:, 2], 
                      TZ3 = tonnetz_final[:, 3], TZ4 = tonnetz_final[:, 4], TZ5 = tonnetz_final[:, 5])
  
  path_labels.to_csv('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Final_OverlapData_StatFeatures.csv', mode = 'a', header = not os.path.exists('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Final_OverlapData_StatFeatures.csv'), index=False)
  print("Saved : ", i*noChunks, " to ", (i+1)*noChunks)

Chunks of 500 :
Processed Audios :  165500
Processed Audios :  165600
Processed Audios :  165700
Processed Audios :  165800
Processed Audios :  165900
Saved :  165500  to  166000
Processed Audios :  166000
Processed Audios :  166100
Processed Audios :  166200
Processed Audios :  166300
Processed Audios :  166400
Saved :  166000  to  166500
Processed Audios :  166500
Processed Audios :  166600
Processed Audios :  166700
Processed Audios :  166800
Processed Audios :  166900
Saved :  166500  to  167000
Processed Audios :  167000
Processed Audios :  167100
Processed Audios :  167200
Processed Audios :  167300
Processed Audios :  167400
Saved :  167000  to  167500
Processed Audios :  167500
Processed Audios :  167600
Processed Audios :  167700
Processed Audios :  167800
Processed Audios :  167900
Saved :  167500  to  168000
Processed Audios :  168000
Processed Audios :  168100
Processed Audios :  168200
Processed Audios :  168300
Processed Audios :  168400
Saved :  168000  to  168500
Proces

In [None]:
# Loading final CSV file of statistical features which contains ClassLabels, AudioPath and Features

finalFile = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Final_OverlapData_StatFeatures.csv")
print(finalFile)

       ClassLabels  ...       TZ5
0           aldfly  ...  0.000774
1           aldfly  ...  0.001865
2           aldfly  ...  0.001195
3           aldfly  ...  0.005076
4           aldfly  ...  0.003674
...            ...  ...       ...
389773      yetvir  ...  0.010916
389774      yetvir  ...  0.004017
389775      yetvir  ... -0.000790
389776      yetvir  ... -0.000111
389777      yetvir  ...  0.002673

[389778 rows x 76 columns]
