In [1]:
# Spectral Feature Extraction
# Additional Documentation: https://librosa.org/doc/main/feature.html#spectral-features

In [1]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import normalize
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from tensorflow.keras import models, layers
import tensorflow as tf
from parselmouth.praat import call
import parselmouth
import statistics
import random
import soundfile as sf 
from parselmouth.praat import call
from scipy.stats.mstats import zscore
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from scipy.io import wavfile
import librosa
from librosa import feature
import csv
from tempfile import mktemp




In [2]:
def extract_audio_files(folder_path):
    audio_files = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".wav"):  # Assuming your audio files have .wav extension
            audio_files.append(os.path.join(folder_path, filename))
    return audio_files

# Paths to your concussion and no concussion folders
concussion_folder = "C:\\USF\\Semester2\\SmartAndConnectedHealth\\FinalProject\\Concussion"
no_concussion_folder = "C:\\USF\\Semester2\\SmartAndConnectedHealth\\FinalProject\\NoConcussion"

# Extract audio files from both folders
concussion_files = extract_audio_files(concussion_folder)
no_concussion_files = extract_audio_files(no_concussion_folder)

In [3]:
# Function to extract features from audio
def get_feature_vector(audio_file):
    y, sr = librosa.load(audio_file, sr=None)  # Load audio file
    
    # Extract features
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))
    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13), axis=1)
    lpc = np.std(librosa.lpc(y, order=16))  # Specify the order parameter
    
    # Additional features
    spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr))
    chroma_stft = np.mean(librosa.feature.chroma_stft(y=y, sr=sr))
    tempo = librosa.beat.tempo(y=y, sr=sr)[0]
    
    # Combine features into a single feature vector
    feature_vector = [spectral_centroid, spectral_bandwidth, spectral_rolloff,
                      zero_crossing_rate, *mfccs, lpc, spectral_contrast,
                      chroma_stft, tempo]
    return feature_vector

In [4]:
# Placeholder function for feature extraction
def extract_features(audio_files):
    features = []
    for file in audio_files:
        feature_vector = get_feature_vector(file)
        features.append(feature_vector)
    return features

In [5]:
# Data Augmentation: Add noise, pitch shifting, and tempo shifting
def augment_data(audio_files):
    augmented_files = []
    for file in audio_files:
        y, sr = librosa.load(file, sr=None)  # Load audio file
        # Add noise
        y_noise = y + 0.005 * np.random.randn(len(y))
        noise_file = mktemp(suffix='.wav')  # Using mktemp to create temporary file names
        sf.write(noise_file, y_noise, sr)  # Writing the audio file using soundfile
        augmented_files.append(noise_file)
        # Pitch shifting
        n_steps = random.randint(-3, 3)
        y_pitch_shift = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
        pitch_shift_file = mktemp(suffix='.wav')  # Using mktemp to create temporary file names
        sf.write(pitch_shift_file, y_pitch_shift, sr)  # Writing the audio file using soundfile
        augmented_files.append(pitch_shift_file)
        # Tempo shifting
        y_tempo_shift = librosa.effects.time_stretch(y, rate=1.2)
        tempo_shift_file = mktemp(suffix='.wav')  # Using mktemp to create temporary file names
        sf.write(tempo_shift_file, y_tempo_shift, sr)  # Writing the audio file using soundfile
        augmented_files.append(tempo_shift_file)
    return augmented_files

In [6]:
# Extract features for concussion files
concussion_features = extract_features(concussion_files)
# Augment data for concussion files
augmented_concussion_files = augment_data(concussion_files)
augmented_concussion_features = extract_features(augmented_concussion_files)

# Extract features for no concussion files
no_concussion_features = extract_features(no_concussion_files)
# Augment data for no concussion files
augmented_no_concussion_files = augment_data(no_concussion_files)
augmented_no_concussion_features = extract_features(augmented_no_concussion_files)

# Combine features and labels
all_features = concussion_features + augmented_concussion_features + no_concussion_features + augmented_no_concussion_features
all_labels = [1] * (len(concussion_features) + len(augmented_concussion_features)) + [0] * (len(no_concussion_features) + len(augmented_no_concussion_features))


In [7]:
# Write features and labels to CSV
header = ['SpectralCentroid', 'SpectralBandwidth', 'SpectralRolloff',
          'ZeroCrossingRate', 'MFCC1', 'MFCC2', 'MFCC3', 'MFCC4', 'MFCC5',
          'MFCC6', 'MFCC7', 'MFCC8', 'MFCC9', 'MFCC10', 'MFCC11', 'MFCC12',
          'MFCC13', 'LPC', 'SpectralContrast', 'ChromaSTFT', 'Tempo']

In [8]:
with open('concussion_classification_features.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(header)
    writer.writerows(all_features)

with open('concussion_classification_labels.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Label'])
    writer.writerows([[label] for label in all_labels])