In [18]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
import os
from tensorflow import keras
from keras.models import load_model

# Load dataset (Ensure you have preprocessed audio files and labels)
def load_audio_data(data_dir):
    X, y = [], []
    for label in ["Autism", "Non autism"]:
        class_dir = os.path.join(data_dir, label)
        for file in os.listdir(class_dir):
            file_path = os.path.join(class_dir, file)
            y_audio, sr = librosa.load(file_path, sr=22050, mono=True)
            mfcc = librosa.feature.mfcc(y=y_audio, sr=sr, n_mfcc=40)
            mfcc_scaled = np.mean(mfcc.T, axis=0)
            X.append(mfcc_scaled)
            y.append(0 if label == "Autism" else 1)
    return np.array(X), np.array(y)

# Load Data
data_dir = "D://autism early sathyabhama//autism//audio datasets//"  # Update with your dataset path
X, y = load_audio_data(data_dir)

# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build Model
model = Sequential([
    Dense(128, activation='relu', input_shape=(40,)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(2, activation='softmax')
])

# Compile Model
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train Model
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=16, callbacks=[early_stopping])

# Save Model
model.save("audio_classification_model.h5")

# Load pre-trained model
model_path = "audio_classification_model.h5"
audio_model = load_model(model_path)

# Define class labels
audio_labels = {0: 'Autism', 1: 'Non autism'}

# Function to process and predict audio
def process_and_predict_audio(audio_path):
    try:
        y, sr = librosa.load(audio_path, sr=22050, mono=True)  # Load audio file
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)  # Extract MFCC features
        mfcc_scaled = np.mean(mfcc.T, axis=0)  # Scale features
        mfcc_scaled = np.expand_dims(mfcc_scaled, axis=0)  # Add batch dimension

        prediction = audio_model.predict(mfcc_scaled)
        predicted_class = int(np.argmax(prediction, axis=-1)[0])
        result = audio_labels[predicted_class]
        
        return result
    except Exception as e:
        print(f"Error processing audio: {e}")
        return None

# Select audio file
print("Select an audio file...")
audio_path = "03-01-01-01-01-01-01.wav"  # Replace with actual file path for testing
if os.path.exists(audio_path):
    print(f"Processing: {audio_path}")
    prediction_result = process_and_predict_audio(audio_path)
    print(f"Predicted Classification: {prediction_result}")

    # Display waveform
    y, sr = librosa.load(audio_path, sr=22050)
    plt.figure(figsize=(10, 4))
    librosa.display.waveshow(y, sr=sr)
    plt.title("Audio Waveform")
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")
    plt.show()
else:
    print("No file selected.")


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 60ms/step - accuracy: 0.4981 - loss: 17.4154 - val_accuracy: 0.5417 - val_loss: 7.7925
Epoch 2/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.5382 - loss: 15.4147 - val_accuracy: 0.5833 - val_loss: 1.2102
Epoch 3/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.5836 - loss: 8.3755 - val_accuracy: 0.4583 - val_loss: 6.0754
Epoch 4/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.5112 - loss: 16.3854 - val_accuracy: 1.0000 - val_loss: 0.0728
Epoch 5/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.4997 - loss: 10.8929 - val_accuracy: 0.5417 - val_loss: 5.7020
Epoch 6/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.4881 - loss: 9.6966 - val_accuracy: 0.5417 - val_loss: 1.9266
Epoch 7/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m



Select an audio file...
No file selected.
