In [14]:
import os
import librosa
import numpy as np
import pandas as pd

# Path to audio files
audio_folder_path = r'/Users/dj/Academics/ML in Biomedical Signal/Assignment5/HW5'

# Prepare list to hold features and labels
features_list = []
labels = []  # Use if you have label data

for filename in os.listdir(audio_folder_path):
    if filename.endswith(".wav"):
        file_path = os.path.join(audio_folder_path, filename)
        
        # Load audio
        y, sr = librosa.load(file_path)
        
        # Extract features
        mfcc = librosa.feature.mfcc(y=y, sr=48000, n_fft = 2048, hop_length = 512, n_mfcc=13).mean(axis=1)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr).mean(axis=1)
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr, fmin=50.0, n_bands=4).mean(axis=1)

        # Flatten and combine features
        features = np.hstack([mfcc, chroma, spectral_contrast])
        features_list.append(features)

# Convert to DataFrame
columns = [f'mfcc_{i}' for i in range(13)] + [f'chroma_{i}' for i in range(12)] + [f'spectral_{i}' for i in range(5)]
df = pd.DataFrame(features_list, columns=columns)


In [15]:
df

Unnamed: 0,mfcc_0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,...,chroma_7,chroma_8,chroma_9,chroma_10,chroma_11,spectral_0,spectral_1,spectral_2,spectral_3,spectral_4
0,-552.902161,22.684290,-26.931173,-5.886051,-28.016903,-19.979425,-2.863987,-8.894402,-1.455967,2.749157,...,0.367262,0.319500,0.371089,0.379138,0.332331,7.377384,6.472880,8.960050,20.336298,40.529993
1,-538.385864,3.364012,-45.456249,-1.516048,-29.618446,-16.022963,-9.452320,-13.803437,2.730068,-0.744813,...,0.405108,0.381930,0.427309,0.371818,0.380971,8.324263,6.740893,8.547026,19.657679,41.672362
2,-526.573303,14.465881,-38.190113,-13.814740,-20.021784,-21.203564,-5.581099,-9.846246,2.299063,5.428998,...,0.460022,0.438687,0.408278,0.399060,0.439300,9.494720,8.275795,8.957377,20.333466,40.323618
3,-594.845032,2.142517,-53.766155,-7.876544,-28.994806,-20.061617,-5.327751,-9.434718,1.806913,-6.161159,...,0.311888,0.414934,0.409194,0.394784,0.394787,7.631609,10.046579,11.584344,21.341090,43.669252
4,-679.459839,24.994795,-37.267689,-13.739269,-25.516190,-19.146444,-7.709716,-19.823643,2.750490,-4.100357,...,0.352170,0.304213,0.228761,0.207045,0.267535,7.854300,9.519986,11.527567,24.415997,42.067634
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1007,-569.816223,54.689980,-32.596516,-6.012739,-22.450716,-23.354284,-1.420913,-8.425967,-9.155874,-6.566686,...,0.261869,0.332699,0.351286,0.329142,0.339389,9.692666,8.498748,17.162215,16.187645,37.995366
1008,-465.234863,41.713882,-34.157848,7.870391,-11.479133,-15.336500,-7.497872,-1.839165,-5.219193,-1.413557,...,0.429174,0.379016,0.355517,0.303318,0.345956,5.456435,8.728136,20.079830,19.521328,41.828095
1009,-474.916138,49.511570,-36.862991,3.358233,-27.952831,-20.018675,-9.314782,-5.227192,-4.729958,-3.421104,...,0.409252,0.426818,0.388537,0.325702,0.329496,9.360326,7.895346,19.588188,18.735982,43.837756
1010,-585.095276,63.431713,-22.549849,8.111218,-2.510392,-16.009018,-0.022727,-3.471063,1.438526,-1.987492,...,0.352884,0.336896,0.275761,0.245892,0.345393,12.468977,7.993261,20.103376,19.948378,43.616341


In [16]:
df.to_excel('audio_features_new.xlsx',index=False)

In [17]:
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

# Load your data
audio_data = pd.read_excel(r'/Users/dj/Academics/ML in Biomedical Signal/Assignment5/audio_features_new.xlsx')

# Initialize the MinMaxScaler to normalize features between 0 and 1
scaler = MinMaxScaler()

# Apply the scaler to the entire dataset, assuming all columns are features to normalize
normalized_data = pd.DataFrame(scaler.fit_transform(audio_data), columns=audio_data.columns)

# Display the first few rows of the normalized data
print(normalized_data.head())
normalized_data.to_excel('normalized_data_audio_features_new.xlsx',index=False)

     mfcc_0    mfcc_1    mfcc_2    mfcc_3    mfcc_4    mfcc_5    mfcc_6  \
0  0.501790  0.347786  0.552677  0.497981  0.290662  0.394305  0.507790   
1  0.539027  0.167942  0.265533  0.583828  0.251786  0.502527  0.332049   
2  0.569329  0.271285  0.378160  0.342223  0.484737  0.360820  0.435312   
3  0.394198  0.156571  0.136727  0.458878  0.266925  0.392056  0.442070   
4  0.177144  0.369294  0.392458  0.343706  0.351365  0.417089  0.378532   

     mfcc_7    mfcc_8    mfcc_9  ...  chroma_7  chroma_8  chroma_9  chroma_10  \
0  0.472511  0.420933  0.526178  ...  0.508408  0.395585  0.512943   0.641122   
1  0.321971  0.518917  0.438774  ...  0.584684  0.525889  0.648158   0.623660   
2  0.443322  0.508828  0.593217  ...  0.695359  0.644351  0.602388   0.688650   
3  0.455942  0.497308  0.303279  ...  0.396806  0.594774  0.604590   0.678450   
4  0.137356  0.519395  0.354832  ...  0.477990  0.363677  0.170628   0.230569   

   chroma_11  spectral_0  spectral_1  spectral_2  spectral_3  

In [18]:
import os
import librosa
import numpy as np
import pandas as pd

# Path to audio files
audio_folder_path = r'/Users/dj/Academics/ML in Biomedical Signal/Assignment5/HW5'

# Define the noise reduction function
def reduce_noise(y, sr, noise_reduction_factor=0.1, n_iter=3):
    """Reduce noise in an audio signal using spectral gating."""
    for _ in range(n_iter):
        stft = librosa.stft(y)
        stft_magnitude, stft_phase = librosa.magphase(stft)
        noise_magnitude = np.mean(stft_magnitude[:, :int(sr * 0.1)], axis=1)
        mask = stft_magnitude > noise_reduction_factor * noise_magnitude[:, np.newaxis]
        stft_magnitude_denoised = stft_magnitude * mask
        stft_denoised = stft_magnitude_denoised * stft_phase
        y = librosa.istft(stft_denoised)
    return y

# Prepare list to hold features and labels
features_list = []
labels = []  # Use if you have label data

# Loop through each audio file in the folder
for filename in os.listdir(audio_folder_path):
    if filename.endswith(".wav"):
        file_path = os.path.join(audio_folder_path, filename)
        
        # Load audio
        y, sr = librosa.load(file_path)
        
        # Apply noise reduction
        y_denoised = reduce_noise(y, sr, noise_reduction_factor=10, n_iter=10)
        
        # Extract features from the denoised audio
        mfcc = librosa.feature.mfcc(y=y, sr=48000, n_fft = 2048, hop_length = 512, n_mfcc=13).mean(axis=1)
        chroma = librosa.feature.chroma_stft(y=y_denoised, sr=sr).mean(axis=1)
        spectral_contrast = librosa.feature.spectral_contrast(y=y_denoised, sr=sr, fmin=50.0, n_bands=4).mean(axis=1)

        # Flatten and combine features
        features = np.hstack([mfcc, chroma, spectral_contrast])
        features_list.append(features)

# Define the columns for the DataFrame


In [19]:
columns = [f'mfcc_{i}' for i in range(13)] + [f'chroma_{i}' for i in range(12)] + [f'spectral_{i}' for i in range(5)]

# Create DataFrame from features list
df_denoised = pd.DataFrame(features_list, columns=columns)

# Optional: Add labels if available
# df_denoised['label'] = labels  # Uncomment if you have labels

# Display the DataFrame
print(df_denoised.head())


       mfcc_0     mfcc_1     mfcc_2     mfcc_3     mfcc_4     mfcc_5  \
0 -552.902161  22.684290 -26.931173  -5.886051 -28.016903 -19.979425   
1 -538.385864   3.364012 -45.456249  -1.516048 -29.618446 -16.022963   
2 -526.573303  14.465881 -38.190113 -13.814740 -20.021784 -21.203564   
3 -594.845032   2.142517 -53.766155  -7.876544 -28.994806 -20.061617   
4 -679.459839  24.994795 -37.267689 -13.739269 -25.516190 -19.146444   

     mfcc_6     mfcc_7    mfcc_8    mfcc_9  ...  chroma_7  chroma_8  chroma_9  \
0 -2.863987  -8.894402 -1.455967  2.749157  ...  0.103294  0.154927  0.178822   
1 -9.452320 -13.803437  2.730068 -0.744813  ...  0.105264  0.164167  0.136279   
2 -5.581099  -9.846246  2.299063  5.428998  ...  0.150639  0.150526  0.130194   
3 -5.327751  -9.434718  1.806913 -6.161159  ...  0.137698  0.111277  0.108554   
4 -7.709716 -19.823643  2.750490 -4.100357  ...  0.093830  0.107793  0.114558   

   chroma_10  chroma_11  spectral_0  spectral_1  spectral_2  spectral_3  \
0   0

In [20]:
df_denoised

Unnamed: 0,mfcc_0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,...,chroma_7,chroma_8,chroma_9,chroma_10,chroma_11,spectral_0,spectral_1,spectral_2,spectral_3,spectral_4
0,-552.902161,22.684290,-26.931173,-5.886051,-28.016903,-19.979425,-2.863987,-8.894402,-1.455967,2.749157,...,0.103294,0.154927,0.178822,0.112214,0.092344,9.649834,9.787258,11.776335,18.467273,54.341964
1,-538.385864,3.364012,-45.456249,-1.516048,-29.618446,-16.022963,-9.452320,-13.803437,2.730068,-0.744813,...,0.105264,0.164167,0.136279,0.104068,0.145885,16.002632,16.826487,18.287921,25.051705,56.806426
2,-526.573303,14.465881,-38.190113,-13.814740,-20.021784,-21.203564,-5.581099,-9.846246,2.299063,5.428998,...,0.150639,0.150526,0.130194,0.122418,0.167309,11.684674,11.756954,12.298429,18.399465,49.575316
3,-594.845032,2.142517,-53.766155,-7.876544,-28.994806,-20.061617,-5.327751,-9.434718,1.806913,-6.161159,...,0.137698,0.111277,0.108554,0.131246,0.136753,13.577765,14.165599,17.192221,22.818943,58.017287
4,-679.459839,24.994795,-37.267689,-13.739269,-25.516190,-19.146444,-7.709716,-19.823643,2.750490,-4.100357,...,0.093830,0.107793,0.114558,0.038500,0.018780,14.502537,15.154721,17.343883,23.453260,52.053555
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1007,-569.816223,54.689980,-32.596516,-6.012739,-22.450716,-23.354284,-1.420913,-8.425967,-9.155874,-6.566686,...,0.100944,0.084431,0.127163,0.139535,0.139127,12.467353,14.147658,16.021328,21.899673,50.073371
1008,-465.234863,41.713882,-34.157848,7.870391,-11.479133,-15.336500,-7.497872,-1.839165,-5.219193,-1.413557,...,0.146903,0.133481,0.135249,0.104018,0.082359,9.820819,10.975427,13.842488,19.226346,47.173344
1009,-474.916138,49.511570,-36.862991,3.358233,-27.952831,-20.018675,-9.314782,-5.227192,-4.729958,-3.421104,...,0.173764,0.160167,0.196852,0.180476,0.111880,5.956495,6.606280,9.391815,16.016407,47.947227
1010,-585.095276,63.431713,-22.549849,8.111218,-2.510392,-16.009018,-0.022727,-3.471063,1.438526,-1.987492,...,0.107039,0.108733,0.127104,0.093078,0.108751,16.373722,16.736260,19.232318,24.589999,54.562115


In [21]:
df_denoised.to_excel('denoised_audio_features_new.xlsx',index=False)

In [22]:
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

# Load your data
audio_data = pd.read_excel(r'/Users/dj/Academics/ML in Biomedical Signal/Assignment5/denoised_audio_features_new.xlsx')

# Initialize the MinMaxScaler to normalize features between 0 and 1
scaler = MinMaxScaler()

# Apply the scaler to the entire dataset, assuming all columns are features to normalize
normalized_data = pd.DataFrame(scaler.fit_transform(audio_data), columns=audio_data.columns)

# Display the first few rows of the normalized data
print(normalized_data.head())
normalized_data.to_excel('normalized_data_denoised_audio_features_new.xlsx',index=False)

     mfcc_0    mfcc_1    mfcc_2    mfcc_3    mfcc_4    mfcc_5    mfcc_6  \
0  0.501790  0.347786  0.552677  0.497981  0.290662  0.394305  0.507790   
1  0.539027  0.167942  0.265533  0.583828  0.251786  0.502527  0.332049   
2  0.569329  0.271285  0.378160  0.342223  0.484737  0.360820  0.435312   
3  0.394198  0.156571  0.136727  0.458878  0.266925  0.392056  0.442070   
4  0.177144  0.369294  0.392458  0.343706  0.351365  0.417089  0.378532   

     mfcc_7    mfcc_8    mfcc_9  ...  chroma_7  chroma_8  chroma_9  chroma_10  \
0  0.472511  0.420933  0.526178  ...  0.293725  0.538264  0.614917   0.400940   
1  0.321971  0.518917  0.438774  ...  0.303862  0.586421  0.427586   0.361876   
2  0.443322  0.508828  0.593217  ...  0.537264  0.515330  0.400796   0.449872   
3  0.455942  0.497308  0.303279  ...  0.470699  0.310796  0.305509   0.492205   
4  0.137356  0.519395  0.354832  ...  0.245048  0.292636  0.331943   0.047447   

   chroma_11  spectral_0  spectral_1  spectral_2  spectral_3  

In [None]:
import pandas as pd

# Load the Excel file

df = pd.read_excel(r'/Users/dj/Academics/ML in Biomedical Signal/Assignment5/normalized_data_audio_features_new.xlsx')

# Mapping of numbers to emotions (RAVDESS)
emotion_map = {
    '01': 'Neutral',
    '02': 'Calm',
    '03': 'Happy',
    '04': 'Sad',
    '05': 'Angry',
    '06': 'Fearful',
    '07': 'Disgust',
    '08': 'Surprised'
}

# Extract the third segment from the filenames and map to corresponding emotion
df['Emotion'] = df['Filename'].apply(lambda x: emotion_map[x.split('-')[2]])

# Save the updated DataFrame back to a new Excel file
output_file_path = 'audio_features.xlsx'
df.to_excel(output_file_path, index=False)

print(f"Emotion column added and saved to {output_file_path}")


In [None]:
import pandas as pd

# Load the Excel file

df = pd.read_excel(r'/Users/dj/Academics/ML in Biomedical Signal/Assignment5/normalized_data_denoised_audio_features_new.xlsx')

# Mapping of numbers to emotions (RAVDESS)
emotion_map = {
    '01': 'Neutral',
    '02': 'Calm',
    '03': 'Happy',
    '04': 'Sad',
    '05': 'Angry',
    '06': 'Fearful',
    '07': 'Disgust',
    '08': 'Surprised'
}

# Extract the third segment from the filenames and map to corresponding emotion
df['Emotion'] = df['Filename'].apply(lambda x: emotion_map[x.split('-')[2]])

# Save the updated DataFrame back to a new Excel file
output_file_path = 'audio_features.xlsx'
df.to_excel(output_file_path, index=False)

print(f"Emotion column added and saved to {output_file_path}")


In [1]:
import os
import librosa
import numpy as np
import pandas as pd

# Path to audio files
audio_folder_path = r'/Users/dj/Academics/ML in Biomedical Signal/Assignment5/HW5'

# Prepare list to hold features and labels
features_list = []
labels = []  # Use if you have label data

for filename in os.listdir(audio_folder_path):
    if filename.endswith(".wav"):
        file_path = os.path.join(audio_folder_path, filename)
        
        # Load audio
        y, sr = librosa.load(file_path)
        
        # Extract features
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).mean(axis=1)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr).mean(axis=1)
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr, fmin=50.0, n_bands=4).mean(axis=1)
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr).mean(axis=1)
        spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr).mean(axis=1)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr).mean(axis=1)
        zero_crossing_rate = librosa.feature.zero_crossing_rate(y).mean(axis=1)
        rmse = librosa.feature.rms(y=y).mean(axis=1)
        
        # Flatten and combine features
        features = np.hstack([
            mfcc, 
            chroma, 
            spectral_contrast, 
            spectral_centroid, 
            spectral_rolloff, 
            spectral_bandwidth, 
            zero_crossing_rate, 
            rmse
        ])
        features_list.append(features)

# Define column names
columns = (
    [f'mfcc_{i}' for i in range(13)] + 
    [f'chroma_{i}' for i in range(12)] + 
    [f'spectral_contrast_{i}' for i in range(5)] + 
    ['spectral_centroid'] +
    ['spectral_rolloff'] +
    ['spectral_bandwidth'] +
    ['zero_crossing_rate'] +
    ['rmse']
)

# Convert to DataFrame
df = pd.DataFrame(features_list, columns=columns)

# Display the DataFrame
print(df)


          mfcc_0     mfcc_1     mfcc_2     mfcc_3     mfcc_4     mfcc_5  \
0    -520.757202  31.718325 -15.408394   8.540115  -5.274519 -20.149332   
1    -502.678162  22.472540 -36.139492   5.790903 -13.418743 -16.888086   
2    -487.463043  28.752426 -22.923437  -7.314599  -5.733365 -18.679493   
3    -554.687134  24.070612 -40.613613  -1.101729 -11.908947 -22.207151   
4    -644.078064  39.806393 -18.152361  -0.948223 -10.447104 -21.480946   
...          ...        ...        ...        ...        ...        ...   
1007 -544.508911  63.956226 -16.198565   6.550264   2.414217 -19.109722   
1008 -435.683624  53.949280 -24.301699   8.696988   4.521314  -2.897031   
1009 -449.334137  61.431705 -23.652437  16.012161  -2.791314 -10.863356   
1010 -563.189636  71.033630 -10.488116   8.577755  14.603802  -4.307268   
1011 -564.855652  73.521988 -15.576706   9.413851  12.736660  -3.563011   

         mfcc_6     mfcc_7     mfcc_8     mfcc_9  ...  spectral_contrast_0  \
0    -11.566627 -12.8

In [2]:
df.to_excel('all_audio_features.xlsx',index=False)

In [3]:
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

# Load your data
audio_data = pd.read_excel(r'/Users/dj/Academics/ML in Biomedical Signal/Assignment5/all_audio_features.xlsx')

# Initialize the MinMaxScaler to normalize features between 0 and 1
scaler = MinMaxScaler()

# Apply the scaler to the entire dataset, assuming all columns are features to normalize
normalized_data = pd.DataFrame(scaler.fit_transform(audio_data), columns=audio_data.columns)

# Display the first few rows of the normalized data
print(normalized_data.head())
normalized_data.to_excel('normalized_all_audio_features.xlsx',index=False)

     mfcc_0    mfcc_1    mfcc_2    mfcc_3    mfcc_4    mfcc_5    mfcc_6  \
0  0.520798  0.314601  0.567006  0.556730  0.449990  0.386864  0.408413   
1  0.565741  0.213821  0.258528  0.489382  0.287073  0.455012  0.567220   
2  0.603564  0.282272  0.455182  0.168331  0.440811  0.417578  0.488601   
3  0.436451  0.231240  0.191954  0.320530  0.317275  0.343863  0.376472   
4  0.214232  0.402761  0.526176  0.324291  0.346518  0.359038  0.617533   

     mfcc_7    mfcc_8    mfcc_9  ...  spectral_contrast_0  \
0  0.452502  0.661194  0.726649  ...             0.229675   
1  0.317241  0.655257  0.540304  ...             0.335735   
2  0.436252  0.451015  0.679550  ...             0.466837   
3  0.134821  0.800600  0.747741  ...             0.258150   
4  0.143922  0.647930  0.640614  ...             0.283094   

   spectral_contrast_1  spectral_contrast_2  spectral_contrast_3  \
0             0.170665             0.094187             0.438583   
1             0.207721             0.069940   

In [5]:
import pandas as pd

# Load the Excel file

df = pd.read_excel(r'/Users/dj/Academics/ML in Biomedical Signal/Assignment5/all_audio_features.xlsx')

# Mapping of numbers to emotions (RAVDESS)
emotion_map = {
    '01': 'Neutral',
    '02': 'Calm',
    '03': 'Happy',
    '04': 'Sad',
    '05': 'Angry',
    '06': 'Fearful',
    '07': 'Disgust',
    '08': 'Surprised'
}

# Extract the third segment from the filenames and map to corresponding emotion
df['Emotion'] = df['Filename'].apply(lambda x: emotion_map[x.split('-')[2]])

# Save the updated DataFrame back to a new Excel file
output_file_path = 'labeled_audio_features.xlsx'
df.to_excel(output_file_path, index=False)

print(f"Emotion column added and saved to {output_file_path}")


Emotion column added and saved to labeled_audio_features.xlsx
