<a href="https://colab.research.google.com/github/ajay47847/pcg_featuresExtraction/blob/main/wav2vacfeature.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
import pandas as pd
import numpy as np
import os
from scipy.io import wavfile
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
import torch
from transformers import Wav2Vec2Processor, Wav2Vec2Model

# Function to load audio files from a specified folder
def load_audio_files(file_list, folder_path):
    audio_data = []
    record_names = []
    for file_name in file_list:
        file_path = os.path.join(folder_path, file_name)
        sr, y = wavfile.read(file_path)  # Load the audio file
        audio_data.append(y)
        record_names.append(file_name)
    return audio_data, record_names, sr

# Function to normalize audio data using z-score
def z_score_normalize_audio_data(audio_data):
    normalized_audio = []
    for signal in audio_data:
        mean_val = np.mean(signal)
        std_val = np.std(signal)
        if std_val == 0:
            normalized_signal = signal - mean_val
        else:
            normalized_signal = (signal - mean_val) / std_val
        normalized_audio.append(normalized_signal)
    return normalized_audio

# Function to extract features using Wav2Vec2.0
def extract_features_wav2vec(audio_data, processor, model, device):
    features = []
    for signal in audio_data:
        inputs = processor(signal, return_tensors='pt', sampling_rate=16000).input_values.to(device)
        with torch.no_grad():
            outputs = model(inputs).last_hidden_state
        features.append(outputs.squeeze(0).mean(dim=0).cpu().numpy())
    return np.array(features)

# Load the Excel file and select relevant columns
df = pd.read_excel('/content/PMEA_37_12_2181_OnlineAppendix.xlsx', sheet_name='SUAHSDB')
df = df[['Record name', 'Type (-1=normal 1=abnormal)']]

# Define the folder path containing the audio files
folder_path = '/content/drive/MyDrive/training-f'

# Get a list of all .wav files in the folder
audio_files = [file for file in os.listdir(folder_path) if file.endswith('.wav')]

# Filter the audio files based on record names
audio_files_filtered = [file for file in audio_files if any(record in file for record in df['Record name'])]

# Load audio data
audio_data, record_names_loaded, sample_rate = load_audio_files(audio_files_filtered, folder_path)

# Remove file extensions for comparison
record_names_loaded_no_ext = [os.path.splitext(name)[0] for name in record_names_loaded]

# Z-normalize the audio data
audio_data_norm = z_score_normalize_audio_data(audio_data)

# Load Wav2Vec2.0 model and processor
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Extract features using Wav2Vec2.0
features = extract_features_wav2vec(audio_data_norm, processor, model, device)

# Create a DataFrame for features with record names
df_features = pd.DataFrame(features)

# Add 'Record Name' and 'Type (-1=normal 1=abnormal)' columns
df_features.insert(0, 'Record Name', record_names_loaded)
df_features.insert(1, 'Type (-1=normal 1=abnormal)', df.set_index('Record name').loc[record_names_loaded_no_ext]['Type (-1=normal 1=abnormal)'].values)

# Sort DataFrame by 'Record Name' in ascending order and reset index
df_features_sorted = df_features.sort_values(by='Record Name').reset_index(drop=True)

# Print the sorted DataFrame
print(df_features_sorted.to_string())


Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


    Record Name  Type (-1=normal 1=abnormal)         0         1         2         3         4         5         6         7         8         9        10        11        12        13        14        15        16        17        18        19        20        21        22        23        24        25        26        27        28        29        30        31        32        33        34        35        36        37        38        39        40        41        42        43        44        45        46        47        48        49        50        51        52        53        54        55        56        57        58        59        60        61        62        63        64        65        66        67        68        69        70        71        72        73        74        75        76        77        78        79        80        81        82        83        84        85        86        87        88        89            90        91        92        93        94  

In [13]:
import pandas as pd
import numpy as np
import os
from scipy.io import wavfile
import torch
from transformers import Wav2Vec2Processor, Wav2Vec2Model

# Function to load audio files from a specified folder
def load_audio_files(file_list, folder_path):
    audio_data = []
    record_names = []
    for file_name in file_list:
        file_path = os.path.join(folder_path, file_name)
        sr, y = wavfile.read(file_path)  # Load the audio file
        audio_data.append(y)
        record_names.append(file_name)
    return audio_data, record_names, sr

# Function to normalize audio data using z-score
def z_score_normalize_audio_data(audio_data):
    normalized_audio = []
    for signal in audio_data:
        mean_val = np.mean(signal)
        std_val = np.std(signal)
        if std_val == 0:
            normalized_signal = signal - mean_val
        else:
            normalized_signal = (signal - mean_val) / std_val
        normalized_audio.append(normalized_signal)
    return normalized_audio

# Function to extract features using Wav2Vec2.0
def extract_features_wav2vec(audio_data, processor, model, device):
    features = []
    for signal in audio_data:
        inputs = processor(signal, return_tensors='pt', sampling_rate=16000).input_values.to(device)
        with torch.no_grad():
            outputs = model(inputs).last_hidden_state
        features.append(outputs.squeeze(0).mean(dim=0).cpu().numpy())
    return np.array(features)

# Load the Excel file and select relevant columns
df = pd.read_excel('/content/PMEA_37_12_2181_OnlineAppendix.xlsx', sheet_name='SUAHSDB')
df = df[['Record name', 'Type (-1=normal 1=abnormal)']]

# Define the folder path containing the audio files
folder_path = '/content/drive/MyDrive/training-f'

# Get a list of all .wav files in the folder
audio_files = [file for file in os.listdir(folder_path) if file.endswith('.wav')]

# Filter the audio files based on record names
audio_files_filtered = [file for file in audio_files if any(record in file for record in df['Record name'])]

# Load audio data
audio_data, record_names_loaded, sample_rate = load_audio_files(audio_files_filtered, folder_path)

# Remove file extensions for comparison
record_names_loaded_no_ext = [os.path.splitext(name)[0] for name in record_names_loaded]

# Z-normalize the audio data
audio_data_norm = z_score_normalize_audio_data(audio_data)

# Load Wav2Vec2.0 model and processor
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Extract features using Wav2Vec2.0
features = extract_features_wav2vec(audio_data_norm, processor, model, device)

# Create DataFrame for features with record names and types
df_features = pd.DataFrame(features)
df_features['Record Name'] = record_names_loaded
df_features['Type (-1=normal 1=abnormal)'] = df.set_index('Record name').loc[record_names_loaded_no_ext]['Type (-1=normal 1=abnormal)'].values

# Sort DataFrame by 'Record Name' in ascending order and reset index
df_features_sorted = df_features.sort_values(by='Record Name').reset_index(drop=True)

# Print the sorted DataFrame
print(df_features_sorted.to_string())


Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


            0         1         2         3         4         5         6         7         8         9        10        11        12        13        14        15        16        17        18        19        20        21        22        23        24        25        26        27        28        29        30        31        32        33        34        35        36        37        38        39        40        41        42        43        44        45        46        47        48        49        50        51        52        53        54        55        56        57        58        59        60        61        62        63        64        65        66        67        68        69        70        71        72        73        74        75        76        77        78        79        80        81        82        83        84        85        86        87        88        89            90        91        92        93        94        95        96        97        98   