In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
import joblib
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Define paths
RECORDINGS_FOLDER = "recordingss"
FEATURES_FOLDER = "features"
MODEL_FILE = "cnn_model.pkl"


In [None]:
# Ensure the features folder exists
if not os.path.exists(FEATURES_FOLDER):
    os.makedirs(FEATURES_FOLDER)

# Function to extract MFCC features with fixed shape
def extract_mfcc_features(audio_file, fixed_length=20, max_time_steps=50):
    y, sr = librosa.load(audio_file, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=fixed_length)

    # Ensure all MFCCs have shape (20, 50)
    if mfcc.shape[1] < max_time_steps:
        pad_width = max_time_steps - mfcc.shape[1]
        mfcc = np.pad(mfcc, ((0, 0), (0, pad_width)), mode='constant')
    elif mfcc.shape[1] > max_time_steps:
        mfcc = mfcc[:, :max_time_steps]

    return np.expand_dims(mfcc, axis=-1)  # Shape (20, 50, 1)


In [None]:
# Extract MFCC features
for audio_file in os.listdir(RECORDINGS_FOLDER):
    if audio_file.endswith('.m4a'):
        mfcc = extract_mfcc_features(os.path.join(RECORDINGS_FOLDER, audio_file))
        np.save(os.path.join(FEATURES_FOLDER, audio_file.replace('.m4a', '.npy')), mfcc)

# Function to load and process MFCC features
def load_features_and_labels():
    autistic_files = [f for f in os.listdir(FEATURES_FOLDER) if f.startswith("aut_")]
    non_autistic_files = [f for f in os.listdir(FEATURES_FOLDER) if f.startswith("split-")]

    autistic_data = [np.load(os.path.join(FEATURES_FOLDER, f)) for f in autistic_files]
    non_autistic_data = [np.load(os.path.join(FEATURES_FOLDER, f)) for f in non_autistic_files]

    X = np.vstack((autistic_data, non_autistic_data))
    y = np.hstack((np.ones(len(autistic_data)), np.zeros(len(non_autistic_data))))

    return X, y


  y, sr = librosa.load(audio_file, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(audio_file, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(audio_file, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(audio_file, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(audio_file, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(au

In [None]:
# Load dataset
X, y = load_features_and_labels()
X = np.array(X)
y = to_categorical(y, num_classes=2)  # Convert labels to categorical


In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [None]:
# CNN Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(20, 50, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(2, activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=30, batch_size=16, validation_data=(X_test, y_test))

# Evaluate model
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

accuracy = accuracy_score(y_test_classes, y_pred_classes)
precision = precision_score(y_test_classes, y_pred_classes)
recall = recall_score(y_test_classes, y_pred_classes)
f1 = f1_score(y_test_classes, y_pred_classes)

print(f"🎯 CNN Model Accuracy: {accuracy:.2f}")
print(f"🎯 Precision: {precision:.2f}")
print(f"🎯 Recall: {recall:.2f}")
print(f"🎯 F1-score: {f1:.2f}")

# Save model as .pkl
joblib.dump(model, MODEL_FILE)
print(f"✅ Model saved as {MODEL_FILE}")


Epoch 1/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 198ms/step - accuracy: 0.5133 - loss: 5.0997 - val_accuracy: 0.5455 - val_loss: 6.1657
Epoch 2/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.5933 - loss: 7.2663 - val_accuracy: 0.5455 - val_loss: 4.4081
Epoch 3/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step - accuracy: 0.7567 - loss: 4.8858 - val_accuracy: 0.7273 - val_loss: 1.4814
Epoch 4/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step - accuracy: 0.7983 - loss: 2.2765 - val_accuracy: 0.9091 - val_loss: 1.5578
Epoch 5/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step - accuracy: 0.8783 - loss: 2.0369 - val_accuracy: 0.8182 - val_loss: 0.7857
Epoch 6/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step - accuracy: 0.8783 - loss: 0.9451 - val_accuracy: 0.8182 - val_loss: 2.3677
Epoch 7/30
[1m2/2[0m [32m━━━━━━━━━━━━━━



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
🎯 CNN Model Accuracy: 0.91
🎯 Precision: 0.86
🎯 Recall: 1.00
🎯 F1-score: 0.92
✅ Model saved as cnn_model.pkl


In [None]:
from google.colab import files

files.download("cnn_model.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Evaluate model
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

accuracy = accuracy_score(y_test_classes, y_pred_classes)
precision = precision_score(y_test_classes, y_pred_classes)
recall = recall_score(y_test_classes, y_pred_classes)
f1 = f1_score(y_test_classes, y_pred_classes)

print(f"🎯 CNN Model Accuracy: {accuracy:.2f}")
print(f"🎯 Precision: {precision:.2f}")
print(f"🎯 Recall: {recall:.2f}")
print(f"🎯 F1-score: {f1:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
🎯 CNN Model Accuracy: 0.91
🎯 Precision: 0.86
🎯 Recall: 1.00
🎯 F1-score: 0.92


In [None]:
import librosa
import numpy as np
import tensorflow as tf
from google.colab import files

MODEL_FILE = "cnn_model.h5"

# Load trained CNN model
model = tf.keras.models.load_model(MODEL_FILE)

# Function to extract MFCC for a single test file
def extract_mfcc_features(audio_file, fixed_length=20):
    y, sr = librosa.load(audio_file, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=fixed_length)

    # Ensure shape is always (20, 50)
    if mfcc.shape[1] < 50:
        pad_width = 50 - mfcc.shape[1]
        mfcc = np.pad(mfcc, ((0,0), (0, pad_width)), mode='constant')
    elif mfcc.shape[1] > 50:
        mfcc = mfcc[:, :50]

    return np.expand_dims(mfcc, axis=(0, -1))  # Shape (1, 20, 50, 1)

# Upload a test file
print("📂 Upload an .m4a file for testing")
uploaded = files.upload()
test_file = list(uploaded.keys())[0]

# Run inference
mfcc_features = extract_mfcc_features(test_file)
prediction = model.predict(mfcc_features)
label = "Autistic" if np.argmax(prediction) == 1 else "Non-Autistic"

print(f"🧠 Prediction: {label}")




📂 Upload an .m4a file for testing


Saving non_imdad.m4a to non_imdad.m4a


  y, sr = librosa.load(audio_file, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step
🧠 Prediction: Non-Autistic
