<a href="https://colab.research.google.com/github/ajay47847/pcg_classification/blob/main/Untitled61.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import os
from scipy.io import wavfile
from scipy.signal import butter, filtfilt, welch
import matplotlib.pyplot as plt

def load_audio_files(file_list, folder_path):
    audio_data = []
    min_length = float('inf')
    for file_name in file_list:
        file_path = os.path.join(folder_path, file_name)
        sr, y = wavfile.read(file_path)  # Load the audio file
        audio_data.append(y)
        min_length = min(min_length, len(y))
    # Trim all audio files to the minimum length
    audio_data = [audio[:min_length] for audio in audio_data]
    return np.array(audio_data), sr

def apply_filter(data, sample_rate, filter_type, cutoff_freqs):
    nyquist = 0.5 * sample_rate
    normalized_cutoff = [freq / nyquist for freq in cutoff_freqs]
    b, a = butter(N=1, Wn=normalized_cutoff, btype=filter_type)
    filtered_data = filtfilt(b, a, data)
    return filtered_data

def z_score_normalization(audio_data):
    normalized_audio_data = []
    for signal in audio_data:
        mean = np.mean(signal)
        std = np.std(signal)
        normalized_signal = (signal - mean) / std
        normalized_audio_data.append(normalized_signal)
    return np.array(normalized_audio_data)

def compute_psd(audio_data, sample_rate):
    psd_list = []
    for signal in audio_data:
        frequencies, psd = welch(signal, sample_rate)
        psd_list.append(psd)
    psd_array = np.array(psd_list)
    return frequencies, psd_array

# Load the Excel file and select relevant columns
df = pd.read_excel('/content/PMEA_37_12_2181_OnlineAppendix.xlsx', sheet_name='SUAHSDB')
df = df[['Record name', 'Diagnosis', 'Type (-1=normal 1=abnormal)']]

# Extract 10 normal and 10 abnormal records
normal_records = df[df['Type (-1=normal 1=abnormal)'] == -1].head(10)
abnormal_records = df[df['Type (-1=normal 1=abnormal)'] == 1].head(10)

# Get the list of normal and abnormal record names
normal_record_names = normal_records['Record name'].tolist()
abnormal_record_names = abnormal_records['Record name'].tolist()

# Define the folder path containing the audio files
folder_path = '/content/drive/MyDrive/training-f'

# Get a list of all .wav files in the folder
audio_files = [file for file in os.listdir(folder_path) if file.endswith('.wav')]

# Filter the audio files based on normal and abnormal record names
normal_audio_files = [file for file in audio_files if any(record in file for record in normal_record_names)]
abnormal_audio_files = [file for file in audio_files if any(record in file for record in abnormal_record_names)]

# Load normal and abnormal audio data
normal_audio_data, sample_rate1 = load_audio_files(normal_audio_files, folder_path)
abnormal_audio_data, sample_rate2 = load_audio_files(abnormal_audio_files, folder_path)

# Extract 2.5 seconds of audio
duration = 2.5
num_samples1 = int(duration * sample_rate1)
num_samples2 = int(duration * sample_rate2)
normal_audio_data = normal_audio_data[:, :num_samples1]
abnormal_audio_data = abnormal_audio_data[:, :num_samples2]

# Apply bandpass filter to the audio data
normal_audio_data_filtered  = apply_filter(normal_audio_data, sample_rate1, 'band', [25, 600])
abnormal_audio_data_filtered = apply_filter(abnormal_audio_data, sample_rate2, 'band', [25, 600])

# Apply z-score normalization
normal_audio_data_norm = z_score_normalization(normal_audio_data_filtered)
abnormal_audio_data_norm = z_score_normalization(abnormal_audio_data_filtered)

# Compute PSD for normal and abnormal audio data
frequencies, normal_psd_array = compute_psd(normal_audio_data_norm, sample_rate1)
frequencies, abnormal_psd_array = compute_psd(abnormal_audio_data_norm, sample_rate2)

def slice_psd_and_compute_features(psd_array, frequencies, start_freq, stop_freq, increment, overlap):
    features_list = []
    step = increment - overlap
    for low in range(start_freq, stop_freq, step):
        high = low + increment
        if high > stop_freq:
            high = stop_freq
        freq_mask = (frequencies >= low) & (frequencies < high)
        sliced_psd = psd_array[:, freq_mask]
        mean_psd = np.mean(sliced_psd, axis=1)
        std_psd = np.std(sliced_psd, axis=1)
        features_list.append((mean_psd, std_psd))
    return np.array(features_list)

# Define slicing parameters
start_freq = 250
stop_freq = 600
increment = 50
overlap = 25

# Compute features for normal and abnormal PSD data
normal_feature_matrix = slice_psd_and_compute_features(normal_psd_array, frequencies, start_freq, stop_freq, increment, overlap)
abnormal_feature_matrix = slice_psd_and_compute_features(abnormal_psd_array, frequencies, start_freq, stop_freq, increment, overlap)

# Extract mean and std features
normal_mean_features = np.array([feature[0] for feature in normal_feature_matrix])
normal_std_features = np.array([feature[1] for feature in normal_feature_matrix])
abnormal_mean_features = np.array([feature[0] for feature in abnormal_feature_matrix])
abnormal_std_features = np.array([feature[1] for feature in abnormal_feature_matrix])

# Define frequency ranges
frequency_ranges = [f"{low}-{low+increment}" for low in range(start_freq, stop_freq, increment-overlap)]

# Create DataFrame for normal features
normal_features_df = pd.DataFrame(
    data=np.hstack((normal_mean_features, normal_std_features)),
    index=frequency_ranges,
    columns=[f"Normal Mean {i+1}" for i in range(normal_mean_features.shape[1])] + [f"Normal Std {i+1}" for i in range(normal_std_features.shape[1])]
)

# Create DataFrame for abnormal features
abnormal_features_df = pd.DataFrame(
    data=np.hstack((abnormal_mean_features, abnormal_std_features)),
    index=frequency_ranges,
    columns=[f"Abnormal Mean {i+1}" for i in range(abnormal_mean_features.shape[1])] + [f"Abnormal Std {i+1}" for i in range(abnormal_std_features.shape[1])]
)

# Combine normal and abnormal feature DataFrames
combined_features_df = pd.concat([normal_features_df, abnormal_features_df], axis=1)

# Display the combined DataFrame
print(combined_features_df.to_string())


         Normal Mean 1  Normal Mean 2  Normal Mean 3  Normal Mean 4  Normal Mean 5  Normal Mean 6  Normal Mean 7  Normal Mean 8  Normal Mean 9  Normal Mean 10  Normal Std 1  Normal Std 2  Normal Std 3  Normal Std 4  Normal Std 5  Normal Std 6  Normal Std 7  Normal Std 8  Normal Std 9  Normal Std 10  Abnormal Mean 1  Abnormal Mean 2  Abnormal Mean 3  Abnormal Mean 4  Abnormal Mean 5  Abnormal Mean 6  Abnormal Mean 7  Abnormal Mean 8  Abnormal Mean 9  Abnormal Mean 10  Abnormal Std 1  Abnormal Std 2  Abnormal Std 3  Abnormal Std 4  Abnormal Std 5  Abnormal Std 6  Abnormal Std 7  Abnormal Std 8  Abnormal Std 9  Abnormal Std 10
250-300       0.001035       0.000423       0.000875       0.001325       0.001183       0.000412       0.001351       0.000665       0.000423        0.000951      0.000342  9.375176e-05      0.000407      0.000271      0.000283      0.000094      0.000164      0.000087      0.000138       0.000257         0.000588         0.000840         0.000346         0.000439 