In [1]:
import os
import librosa
import numpy as np
import pandas as pd

# Paths
audio_folder = "TRAIN"
output_csv = "audio_embeddings.csv"

# Function to extract audio embeddings (e.g., MFCCs)
def extract_audio_features(audio_path, n_mfcc=13):
    try:
        # Load the audio file
        y, sr = librosa.load(audio_path, sr=None)
        
        # Extract MFCC features
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        
        # Aggregate MFCCs (e.g., take mean and std across time axis)
        mfcc_mean = np.mean(mfcc, axis=1)
        mfcc_std = np.std(mfcc, axis=1)
        
        # Combine mean and std into a single embedding
        embedding = np.concatenate([mfcc_mean, mfcc_std])
        
        return embedding
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        return None

# Process each audio file and save embeddings
embeddings = []
file_names = []

for file_name in os.listdir(audio_folder):
    if file_name.endswith('.wav'):
        audio_path = os.path.join(audio_folder, file_name)
        embedding = extract_audio_features(audio_path)
        
        if embedding is not None:
            embeddings.append(embedding)
            file_names.append(file_name)

# Convert to DataFrame for saving
embeddings_df = pd.DataFrame(embeddings)
embeddings_df.insert(0, "Filename", file_names)  # Add filenames as the first column

# Save to CSV
embeddings_df.to_csv(output_csv, index=False)

#print(f"Audio embeddings saved to {output_csv}")


In [3]:
# Load the data
train_data = pd.read_csv('TRAIN.csv')
audio_embeddings = pd.read_csv('audio_embeddings.csv')

In [4]:
train_data.head()

Unnamed: 0,Filename,Class
0,346.wav,Negative
1,163.wav,Neutral
2,288.wav,Negative
3,279.wav,Negative
4,244.wav,Negative


In [5]:
audio_embeddings.head()

Unnamed: 0,Filename,0,1,2,3,4,5,6,7,8,...,16,17,18,19,20,21,22,23,24,25
0,28.wav,-580.8789,115.22364,15.318973,31.279287,17.988214,5.256765,-5.390493,6.711666,2.77342,...,26.235401,20.91696,12.164499,15.651846,9.862374,8.668188,8.708022,7.595624,10.063101,9.008025
1,30.wav,-580.95056,115.4417,15.418888,31.41444,17.996742,5.303187,-5.38305,6.772942,2.772726,...,26.121923,20.833368,12.171346,15.737186,9.787956,8.637162,8.751245,7.612542,10.058934,9.00079
2,36.wav,-572.7886,111.368324,10.172045,26.96257,13.83109,3.151526,-5.602227,5.504691,-2.48891,...,18.766754,13.804071,12.640682,12.723204,10.60043,9.021395,9.876261,10.437687,9.282254,8.263548
3,9.wav,-598.7486,109.433556,20.036419,24.895813,19.323107,9.383337,-7.663574,4.774885,1.692682,...,19.933426,18.268124,12.46065,15.253509,9.825088,9.455122,9.935232,6.739247,9.231262,11.57796
4,16.wav,-547.26984,108.96307,16.37278,28.410503,11.1287,-2.526328,-4.537299,6.266095,-2.979559,...,19.858885,17.156792,12.323198,14.942352,13.315815,8.843205,8.530368,10.291992,8.748182,9.09133


In [6]:
# Merge the class information into the audio embeddings
merged_data = pd.merge(audio_embeddings, train_data, on='Filename', how='inner')
merged_data.head()

Unnamed: 0,Filename,0,1,2,3,4,5,6,7,8,...,17,18,19,20,21,22,23,24,25,Class
0,28.wav,-580.8789,115.22364,15.318973,31.279287,17.988214,5.256765,-5.390493,6.711666,2.77342,...,20.91696,12.164499,15.651846,9.862374,8.668188,8.708022,7.595624,10.063101,9.008025,Positive
1,30.wav,-580.95056,115.4417,15.418888,31.41444,17.996742,5.303187,-5.38305,6.772942,2.772726,...,20.833368,12.171346,15.737186,9.787956,8.637162,8.751245,7.612542,10.058934,9.00079,Positive
2,36.wav,-572.7886,111.368324,10.172045,26.96257,13.83109,3.151526,-5.602227,5.504691,-2.48891,...,13.804071,12.640682,12.723204,10.60043,9.021395,9.876261,10.437687,9.282254,8.263548,Positive
3,9.wav,-598.7486,109.433556,20.036419,24.895813,19.323107,9.383337,-7.663574,4.774885,1.692682,...,18.268124,12.46065,15.253509,9.825088,9.455122,9.935232,6.739247,9.231262,11.57796,Positive
4,16.wav,-547.26984,108.96307,16.37278,28.410503,11.1287,-2.526328,-4.537299,6.266095,-2.979559,...,17.156792,12.323198,14.942352,13.315815,8.843205,8.530368,10.291992,8.748182,9.09133,Positive


In [7]:
# Save to CSV
merged_data.to_csv('updated_audio_embeddings.csv', index=False)