REDUCING SAMPLE SIZE

In [36]:

import os
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from scipy.io import wavfile
from scipy.signal import find_peaks
from pathlib import Path

def audioread(audio_file):
    sample_rate, data = wavfile.read(audio_file)
    return sample_rate, data

# Becuase the .wav values are between -32768 and 32767, we need to output 
# our normalize them to be between -1 and 1
def normalize(data):
    return data.astype(np.float32) / 32728

def find_peaks_with_time(y, time, min_peak_distance_sec):
    # calculate distance in number of samples
    change_in_time = time[1] - time[0]
    print(change_in_time)
    min_peak_distance_samples = int(min_peak_distance_sec / change_in_time)
    print(min_peak_distance_samples)

    # Find peaks
    peak_indices, _ = find_peaks(y, distance=min_peak_distance_samples)

    # map indices to time and amplitudes
    peak_times = time[peak_indices]
    peak_values = y[peak_indices]

    return peak_values, peak_times

def create_dataset(audio_dir):
    # First extract all the .wav files from the audios directory
    path = Path(audio_dir)
    wav_files = [str(file) for file in (path.glob('*.wav'))]

    Fs = 44100

    dataset = []
    threshold = 0.1  # Threshold for peak detection

    # Now do the same process from above
    for files in wav_files:
        extracted_Fs, extracted_data = audioread(files) 

        # Checking if the sampling rate is correct (44.1 KHz)
        if(extracted_Fs != Fs):
            print(f"Sampling rate of .wav must be 44.1 KHz, Given: {extracted_Fs}")
            continue
            
        normalized_data = normalize(extracted_data[:,1])
        dataset.append(normalized_data)

    # Combine all into one np array
    try: 
        data_array = np.concatenate(dataset)
    except ValueError:
        print("Could not concatenate: arrays may have differing shapes.")
        return None

        
    time_vector = np.arange(0,len(data_array) / Fs, 1 / Fs) 

    peak_values, peak_times = find_peaks_with_time(data_array, time_vector, min_peak_distance_sec=0.5)

    filtered_peaks = np.where((peak_values > threshold) & (peak_values < 1))[0]
    filtered_peak_times = peak_times[filtered_peaks]
    #peak_index = [np.argmin(np.abs(time_vector - loc)) for loc in filtered_peak_times]
    # peak_index = (filtered_peak_times * Fs).astype(int)
    peak_index = np.searchsorted(time_vector, filtered_peak_times)

    taps = [data_array[i:i+101] for i in peak_index]

    taps_matrix = np.vstack(taps)
    return taps_matrix





# Directory containing your .wav files
audio_dir = '/mnt/c/Users/mrgen/Downloads/PCA_summer/audios' # adjust this if needed

audio_dataset = create_dataset(audio_dir)
print(audio_dataset[0])

# #print(audio_dataset)
# Placeholder lists for features and labels
mfcc_features = []
labels = []

# Settings for MFCC extraction
sample_rate = 44100
n_mfcc = 13

# Walk through files and extract MFCCs
# for filename in os.listdir(audio_dir):
#     if filename.endswith('.wav'):
#         filepath = os.path.join(audio_dir, filename)
#         y, sr = librosa.load(filepath, sr=sample_rate)
#         mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        
#         # Average MFCCs across time frames
#         mfcc_mean = np.mean(mfcc, axis=1)
#         mfcc_features.append(mfcc_mean)

#         # Add label based on filename (e.g., contains 'good' or 'bad')
#         if 'good' in filename.lower():
#             labels.append(1)
#         elif 'bad' in filename.lower():
#             labels.append(0)
#         else:
#             labels.append(-1)  # unknown

mfcc = librosa.feature.mfcc(y=audio_dataset, sr=sample_rate, n_mfcc=n_mfcc)
print(mfcc[0])
mfcc_mean = np.mean(mfcc, axis=1)
mfcc_features.append(mfcc_mean)

#print(np.shape(mfcc_features))
#print(mfcc_features)
#print(labels)


2.2675736961451248e-05
22050
[ 0.54702395  0.11638352 -0.385572   -0.23383647  0.12820826  0.18299316
  0.02517722 -0.1112503  -0.11097531 -0.00143608 -0.16117698 -0.28345758
  0.0062943   0.3803471   0.26038867 -0.0059582  -0.08610364  0.01127475
 -0.01539966 -0.14250794  0.00883036  0.29647398  0.5333048   0.43537644
 -0.06853459 -0.42581275 -0.15518822  0.00773038 -0.17077121 -0.14046077
  0.02642997  0.16649352  0.27636886  0.11448912 -0.25       -0.2982156
  0.02368003  0.12875825 -0.01628575  0.01341359  0.07837326  0.13282205
  0.12454168  0.03321315  0.02325226  0.14681618  0.15414935 -0.11070032
 -0.27319115 -0.21003422 -0.11363359 -0.02532999  0.12713884  0.24107797
  0.169549   -0.01530799 -0.19649841 -0.31566244 -0.29213518 -0.10636152
  0.06389025  0.19185407  0.26549134  0.23927523  0.17046566  0.02719384
 -0.1309582  -0.20762038 -0.17086287 -0.04201296  0.11577243  0.15943535
  0.10504767  0.01292471 -0.07653996 -0.10584209 -0.12011122 -0.12243339
 -0.0918174  -0.0252688

In [25]:
# Convert to DataFrame
X = np.array(mfcc_features)
y = np.array(labels)

# Remove unknown labels if any
valid_indices = y != -1
X = X[valid_indices]
y = y[valid_indices]

# Standardize (zero mean, unit variance)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X, y)

# PCA
pca = PCA(n_components=6)
X_pca = pca.fit_transform(X_scaled)
print(X_pca)

# Plot
plt.figure(figsize=(8,6))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='coolwarm', edgecolor='k')
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.title("PCA of MFCC Features")
plt.colorbar(label="Label (Good = 1, Bad = 0)")
filenames = [os.path.basename(f) for f in os.listdir(audio_dir) if f.endswith('.wav') and valid_indices[os.listdir(audio_dir).index(f)]]
for i, filename in enumerate(filenames):
    plt.text(X_pca[i, 0] + 0.01, X_pca[i, 1] + 0.01, filename, fontsize=8)

plt.tight_layout()
plt.grid(True)
plt.show()

# Print explained variance
print("Explained variance:", pca.explained_variance_ratio_)
print("Cumulative:", np.cumsum(pca.explained_variance_ratio_))


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (9,) + inhomogeneous part.