In [7]:
import os
import librosa
import numpy as np
from scipy.io import wavfile
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score,classification_report

In [8]:
def extract_features(file_path):
    sample_rate, audio_data= wavfile.read(file_path) 
    # print(len(audio_data) )
    # print(sample_rate, audio_data) 
    mfcc_features = np.mean(librosa.feature.mfcc(y=audio_data.astype(float),sr=sample_rate,n_mfcc=23),axis=1) 
    return mfcc_features


In [9]:
def load_data(data_dir):
    features=[]
    labels=[]
    
    for label in os.listdir(data_dir): 
       label_path=os.path.join(data_dir,label)
       
       if os.path.isdir(label_path):
        for filename in os.listdir(label_path):
            file_path = os.path.join(label_path,filename)
            if filename.endswith(".wav"):
                 features.append(extract_features(file_path))
                 labels.append(label)
    return np.array(features) ,np.array(labels)                
             

In [10]:
from sklearn.metrics import confusion_matrix


def evaluate_model(X_train,X_test,y_train,y_test):
    classifiers=[LogisticRegression(max_iter=500,random_state=0),
                 SVC(random_state=0),
                 KNeighborsClassifier(n_neighbors=5),
                 MLPClassifier(hidden_layer_sizes=(10,8),max_iter=500,random_state=0)]
    
    acc=[]
    models=[]

    for classifier in classifiers:
        classifier.fit(X_train,y_train)
        models.append(classifier)
        
        y_pred = classifier.predict(X_test)
        
        accuracy= accuracy_score(y_test,y_pred)
        acc.append(accuracy)
        
        print(f"For {classifier}:")
        print(f"Accuracy {accuracy*100 :.2f}%")
        print(confusion_matrix(y_test,y_pred))
        print(classification_report(y_test,y_pred))
        print()
        
    best_model=models[np.argmax(np.array(acc))] 
    return best_model


In [11]:
if __name__ == "__main__":
    data_dir="Audios" #B:\scream\Audios
    features,labels=load_data(data_dir)
    labels[labels == 'positive'] = 1
    labels[labels == 'negative'] = 0
    labels= labels.astype(int)
   # print(labels)
    
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42) 
    
    scaler=StandardScaler() # x = (x - mean_of_x)/ std_of_x
    
    X_train=scaler.fit_transform(X_train)
    X_test=scaler.transform(X_test)
    
    best_model= evaluate_model(X_train,X_test,y_train,y_test)
    
    print("Best Model :",best_model)
    
    file_path="single_prediction/133034__robinhood76__03023-cartoon-little-laughter.wav"
    data=extract_features(file_path)
    data=scaler.transform(data.reshape(1,-1))
    print(best_model.predict(data))    

  sample_rate, audio_data= wavfile.read(file_path)


For LogisticRegression(max_iter=500, random_state=0):
Accuracy 100.00%
[[23  0]
 [ 0 20]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        20

    accuracy                           1.00        43
   macro avg       1.00      1.00      1.00        43
weighted avg       1.00      1.00      1.00        43


For SVC(random_state=0):
Accuracy 100.00%
[[23  0]
 [ 0 20]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        20

    accuracy                           1.00        43
   macro avg       1.00      1.00      1.00        43
weighted avg       1.00      1.00      1.00        43


For KNeighborsClassifier():
Accuracy 97.67%
[[23  0]
 [ 1 19]]
              precision    recall  f1-score   support

           0       0.96      1.00      0.98        23
           1       1.

In [12]:
type(labels[0])

numpy.int64