## IMPORTING LIBRARIES

In [None]:
import numpy as np
import scipy as sc
import IPython.display as ipd
from scipy import signal
import librosa.display
import matplotlib.pyplot as plt
import scipy.io.wavfile as wavfile
import pandas as pd
import cv2
import sklearn
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import *
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import KFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier

## LOADING IN THE FILES

In [None]:
import librosa
import os
ROOT_DIR = '/Users/damilolaojedeji/Documents/Classes/Fall 2022/ML/Assignment2/'
#Files = [f'{ROOT_DIR}/Music/'+file for file in os.listdir(f'{ROOT_DIR}/Music')]
Files =[]
Folders=["Blender","Microwave","Music","Siren", "Vacuum"]
for folder in Folders:
    for file in os.listdir(f'{ROOT_DIR}/{folder}'):
        if "wav" in file:
            Files.append(file)

## PREDICTION WITHOUT BINNING AND WINDOWING

In [None]:
FFT_SIZE = 2048
feature_df = pd.DataFrame(columns = ['label']+['max_freq', 'mean_freq', 'std_freq'])
for folder in Folders:
    files = librosa.util.find_files(f"{ROOT_DIR}/{folder}", ext=['wav']) 
    files = np.asarray(files)
    for file_path in files:
        audio = librosa.load(file_path, sr=None, mono=True, offset=0.0, duration=None)
        audio_file = librosa.effects.trim(audio[0])  #trim audio
        spectrogram = signal.spectrogram(audio_file[0], nperseg=FFT_SIZE, fs=audio[1], noverlap=FFT_SIZE/2)
        log_spectogram = np.log((spectrogram[2])+.0001) #.0001 to prevent math error
        
        max_freq = np.max(log_spectogram) #max frequency
       
        mean_freq = np.mean(log_spectogram) #mean frequency
        
        std_freq = np.std(log_spectogram) #std frequency                   
    
    
        feature_df.loc[len(feature_df)] = [folder, max_freq, mean_freq, std_freq]# append features to feature df

In [None]:
# split data into features and labels
feature_data = feature_df[['max_freq', 'mean_freq', 'std_freq']] #features
feature_labels = feature_df['label']  #labels
scaler = RobustScaler() #normalize data for better prediction
feature_data = scaler.fit_transform(feature_data)

In [None]:

xtrain, xtest, ytrain, ytest = train_test_split(feature_data, feature_labels, test_size=0.30)

#training the model
clf = RandomForestClassifier()
clf.fit(xtrain, ytrain)
cv_scores = cross_val_score(clf, xtrain, ytrain, cv=10)
print('Average Cross Validation Score from Training:', cv_scores.mean(), sep='\n', end='\n\n\n')

#testing the model
ypred = clf.predict(xtest)
cm = confusion_matrix(ytest, ypred)
cr = classification_report(ytest, ypred)

print('Confusion Matrix:', cm, sep='\n', end='\n\n\n')
print('Test Statistics:', cr, sep='\n', end='\n\n\n')

print('Testing Accuracy:', accuracy_score(ytest, ypred))

## PREDICTION WITH BINNING BUT WITHOUT WINDOWING

In [None]:
FFT_SIZE = 2048
num_time_bins = 10
num_freq_bins = 10
Features = []
feature_df_bin = pd.DataFrame(columns = ['label']+['max_freq', 'mean_freq', 'std_freq'])
for folder in Folders:
    files = librosa.util.find_files(f"{ROOT_DIR}/{folder}", ext=['wav']) 
    files = np.asarray(files)
    for file_path in files:
        audio = librosa.load(file_path, sr=None, mono=True, offset=0.0, duration=None)
        audio_file = librosa.effects.trim(audio[0])
        spectrogram = signal.spectrogram(audio_file[0], nperseg=FFT_SIZE, fs=audio[1], noverlap=FFT_SIZE/2)
        
        pxx = np.log((spectrogram[2])+.0001)
        
        resized_pxx = cv2.resize(pxx[:,:],(num_freq_bins,num_time_bins))#Binning
        max_freq = np.max(resized_pxx) #max frequency
      
        mean_freq = np.mean(resized_pxx) # mean frequency
        
        std_freq = np.std(resized_pxx) #std frequency                  

        feature_df_bin.loc[len(feature_df_bin)] = [folder, max_freq, mean_freq, std_freq] # append features to feature df

In [None]:
feature_data_bin = feature_df_bin[['max_freq', 'mean_freq', 'std_freq']]
feature_labels = feature_df['label']
scaler = RobustScaler()
feature_data_bin = scaler.fit_transform(feature_data_bin)

In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(feature_data_bin, feature_labels, test_size=0.30)

#training the model
clf = RandomForestClassifier()
clf.fit(xtrain, ytrain)
cv_scores = cross_val_score(clf, xtrain, ytrain, cv=10)
print('Average Cross Validation Score from Training:', cv_scores.mean(), sep='\n', end='\n\n\n')

#testing the model
ypred = clf.predict(xtest)
cm = confusion_matrix(ytest, ypred)
cr = classification_report(ytest, ypred)

print('Confusion Matrix:', cm, sep='\n', end='\n\n\n')
print('Test Statistics:', cr, sep='\n', end='\n\n\n')
print('Testing Accuracy:', accuracy_score(ytest, ypred))

## PREDICTION WITH BINNING AND WINDOWING

In [None]:
no_of_window = 5
feature_wind = pd.DataFrame(columns = ['label']+['max_freq', 'mean_freq', 'std_freq']*no_of_window)



# loop through .wav files
for folder in Folders:
    files = librosa.util.find_files(f"{ROOT_DIR}/{folder}", ext=['wav']) 
    files = np.asarray(files)
    for file_path in files:
        audio = librosa.load(file_path, sr=None, mono=True, offset=0.0, duration=None)
        audio_file = librosa.effects.trim(audio[0])
        spectrogram = signal.spectrogram(audio_file[0], nperseg=FFT_SIZE, fs=audio[1], noverlap=FFT_SIZE/2)
        
        spec = np.log(spectrogram[2]+.0001)    
        L =  len(spec)
        window_length = int(np.ceil(L/(no_of_window*0.5+0.5)))                              
        # log normalize frequencies                                  
    
        
        r = [int(min(FFT_SIZE-1,(i)*window_length*0.5)) for i in range(0,no_of_window+2) ] #windowing
      
        [[spec[r[i]:r[i+2]]] for i in range(0,no_of_window)] 
        max_freq = [np.max(spec[(r[i]):(r[i+2]),:]) for i in range(0,no_of_window)] #maximum frequency
       
        
        mean_freq = [np.mean(spec[r[i]:r[i+2],:]) for i in range(0,no_of_window)]# MEAN FREQUENCY
    
        
        std_freq = [np.std(spec[r[i]:r[i+2],:]) for i in range(0,no_of_window)]#std frequency 
    
        
      
        feature_wind.loc[len(feature_wind)] = [folder]+ max_freq+ mean_freq+ std_freq   # append features to feature df

In [None]:
feature_data_wind = feature_wind[['max_freq', 'mean_freq', 'std_freq']]
feature_labels = feature_df['label']
scaler = RobustScaler()

In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(feature_data_wind, feature_labels, test_size=0.30)

#training the model
clf = RandomForestClassifier()
clf.fit(xtrain, ytrain)
cv_scores = cross_val_score(clf, xtrain, ytrain, cv=10)
print('Average Cross Validation Score from Training:', cv_scores.mean(), sep='\n', end='\n\n\n')

#testing the model
ypred = clf.predict(xtest)
cm = confusion_matrix(ytest, ypred)
cr = classification_report(ytest, ypred)

print('Confusion Matrix:', cm, sep='\n', end='\n\n\n')
print('Test Statistics:', cr, sep='\n', end='\n\n\n')

#This is what we will be grading (>95 expected)
print('Testing Accuracy:', accuracy_score(ytest, ypred))

## MFCSS domain specific

In [None]:
full_path = "/Users/damilolaojedeji/Documents/Classes/Fall 2022/ML/Assignment2/Siren/Siren1.wav"

In [None]:
wave_x, sampling_rate = librosa.load(full_path)
ipd.Audio(wave_x, rate=sampling_rate)

In [None]:
plt.figure(figsize=(15, 4), facecolor=(.9, .9, .9))
librosa.display.waveshow(wave_x, sr=sampling_rate, color='green')

In [None]:
mfccs = librosa.feature.mfcc(wave_x, sr=sampling_rate)

In [None]:
librosa.display.specshow(mfccs, sr=sampling_rate, x_axis='time')

## Chroma - Domain specific feature 

In [None]:
hop_length = 1024
chromagram = librosa.feature.chroma_stft(wave_x, sr=sampling_rate, hop_length=hop_length)

In [None]:
plt.figure(figsize=(15, 5))
librosa.display.specshow(chromagram, x_axis='time', y_axis='chroma', hop_length=hop_length, cmap='coolwarm')

In [None]:
FFT_SIZE = 2048

hop_length = 1024
feature_ds = pd.DataFrame(columns = ['label']+['max_freq', 'mean_freq', 'std_freq'])
for folder in Folders:
    files = librosa.util.find_files(f"{ROOT_DIR}/{folder}", ext=['wav']) 
    files = np.asarray(files)
    for file_path in files:
        audio = librosa.load(file_path, sr=None, mono=True, offset=0.0, duration=None)
        audio_file = librosa.effects.trim(audio[0]) #domain specific
        
        chromagram = librosa.feature.chroma_stft(audio_file[0], audio[1], hop_length=hop_length)
        
        #mfccs = librosa.feature.mfcc(audio_file[0], sr=audio[1])
        #mfccs1 = sklearn.preprocessing.scale(mfccs, axis=1)
       
    
        
        
        
        max_freq = np.max(chromagram)
      
        mean_freq = np.mean(chromagram)
  
        
        std_freq = np.std(chromagram)                   
        
        
        feature_ds.loc[len(feature_ds_bin)] = [folder, max_freq, mean_freq, std_freq] 
    

In [None]:
feature_data_ds = feature_ds_bin[['max_freq', 'mean_freq', 'std_freq']]
feature_labels = feature_df['label']
feature_ds_bin
scaler = RobustScaler()
feature_data_ds = scaler.fit_transform(feature_data_ds)

In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(feature_data_ds, feature_labels, test_size=0.30)

#training the model
clf = RandomForestClassifier()
clf.fit(xtrain, ytrain)
cv_scores = cross_val_score(clf, xtrain, ytrain, cv=10)
print('Average Cross Validation Score from Training:', cv_scores.mean(), sep='\n', end='\n\n\n')

#testing the model
ypred = clf.predict(xtest)
cm = confusion_matrix(ytest, ypred)
cr = classification_report(ytest, ypred)

print('Confusion Matrix:', cm, sep='\n', end='\n\n\n')
print('Test Statistics:', cr, sep='\n', end='\n\n\n')

print('Testing Accuracy:', accuracy_score(ytest, ypred))

## PREDICTION WITH WINDOWING (DOMAIN SPECIFIC)

In [None]:
no_of_window = 5
feature_wind_ds = pd.DataFrame(columns = ['label']+['max_freq', 'mean_freq', 'std_freq']*no_of_window)#+['spec_centroid'])



# loop through .wav files
for folder in Folders:
    files = librosa.util.find_files(f"{ROOT_DIR}/{folder}", ext=['wav']) 
    files = np.asarray(files)
    for file_path in files:
        audio = librosa.load(file_path, sr=None, mono=True, offset=0.0, duration=None)
        audio_file = librosa.effects.trim(audio[0])
        chromagram = librosa.feature.chroma_stft(audio_file[0], audio[1], hop_length=hop_length)
        
        L =  len(chromagram)
        window_length = int(np.ceil(L/(no_of_window*0.5+0.5)))                              
                                     
        
       
        # MAX FREQUENCY
        r = [int(min(FFT_SIZE-1,(i)*window_length*0.5)) for i in range(0,no_of_window+2) ]
      
        [[spec[r[i]:r[i+2]]] for i in range(0,no_of_window)] 
        max_freq = [np.max(chromagram[(r[i]):(r[i+2]),:]) for i in range(0,no_of_window)]
       
        # MEAN FREQUENCY
        #print(5)
        mean_freq = [np.mean(chromagram[r[i]:r[i+2],:]) for i in range(0,no_of_window)]
        #np.mean(spectrogram[2])
        # STD DEV FREQUENCY
        
        std_freq = [np.std(chromagram[r[i]:r[i+2],:]) for i in range(0,no_of_window)]
        #np.std(spectrogram[2])   
        
        # append features to feature df
        feature_wind_ds.loc[len(feature_wind_ds)] = [folder]+ max_freq+ mean_freq+ std_freq

In [None]:
feature_data_wind_ds = feature_wind_ds[['max_freq', 'mean_freq', 'std_freq']]
feature_labels = feature_df['label']
scaler = RobustScaler()
feature_data_wind_ds = scaler.fit_transform(feature_data_wind_ds)

In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(feature_data_wind_ds, feature_labels, test_size=0.30)

#training the model
clf = RandomForestClassifier()
clf.fit(xtrain, ytrain)
cv_scores = cross_val_score(clf, xtrain, ytrain, cv=10)
print('Average Cross Validation Score from Training:', cv_scores.mean(), sep='\n', end='\n\n\n')

#testing the model
ypred = clf.predict(xtest)
cm = confusion_matrix(ytest, ypred)
cr = classification_report(ytest, ypred)

print('Confusion Matrix:', cm, sep='\n', end='\n\n\n')
print('Test Statistics:', cr, sep='\n', end='\n\n\n')

#This is what we will be grading (>95 expected)
print('Testing Accuracy:', accuracy_score(ytest, ypred))