In [None]:
import os
import librosa
import soundfile as sf

def preprocess_audio(file_path, target_sr=16000):
    audio, sr = librosa.load(file_path, sr=None)
    if sr != target_sr:
        audio = librosa.resample(y=audio, orig_sr=sr, target_sr=target_sr)
    return audio, target_sr

def preprocess_and_save(input_folder, output_folder, target_sr=16000):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for actor in os.listdir(input_folder):
        actor_path = os.path.join(input_folder, actor)
        if os.path.isdir(actor_path):
            output_actor_path = os.path.join(output_folder, actor)
            if not os.path.exists(output_actor_path):
                os.makedirs(output_actor_path)
            
            for file_name in os.listdir(actor_path):
                if file_name.endswith('.wav'):
                    file_path = os.path.join(actor_path, file_name)
                    audio, sr = preprocess_audio(file_path, target_sr)
                    output_file_path = os.path.join(output_actor_path, file_name)
                    sf.write(output_file_path, audio, sr)
                    print(f"Processed and saved: {output_file_path}")

input_folder = 'D:\\ai ml prac\\actual\\data set\\archive'
output_folder = 'D:\\ai ml prac\\actual\\data set\\processed audio'

preprocess_and_save(input_folder, output_folder)


Processed and saved: D:\ai ml prac\actual\data set\processed audio\Actor_01\03-01-01-01-01-01-01.wav
Processed and saved: D:\ai ml prac\actual\data set\processed audio\Actor_01\03-01-01-01-01-02-01.wav
Processed and saved: D:\ai ml prac\actual\data set\processed audio\Actor_01\03-01-01-01-02-01-01.wav
Processed and saved: D:\ai ml prac\actual\data set\processed audio\Actor_01\03-01-01-01-02-02-01.wav
Processed and saved: D:\ai ml prac\actual\data set\processed audio\Actor_01\03-01-02-01-01-01-01.wav
Processed and saved: D:\ai ml prac\actual\data set\processed audio\Actor_01\03-01-02-01-01-02-01.wav
Processed and saved: D:\ai ml prac\actual\data set\processed audio\Actor_01\03-01-02-01-02-01-01.wav
Processed and saved: D:\ai ml prac\actual\data set\processed audio\Actor_01\03-01-02-01-02-02-01.wav
Processed and saved: D:\ai ml prac\actual\data set\processed audio\Actor_01\03-01-02-02-01-01-01.wav
Processed and saved: D:\ai ml prac\actual\data set\processed audio\Actor_01\03-01-02-02-01-

In [None]:
import os
import numpy as np
import librosa
import pickle
import re  

def extract_mfcc(file_path, n_mfcc=13, sr=16000):
    audio, sr = librosa.load(file_path, sr=sr)
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
    mfccs_mean = np.mean(mfccs.T, axis=0)
    return mfccs_mean

def extract_features(input_folder, output_file):
    features = []
    labels = []
    
    for actor in os.listdir(input_folder):
        actor_path = os.path.join(input_folder, actor)
        if os.path.isdir(actor_path):
            match = re.search(r'\d+', actor)
            if match:
                actor_id = int(match.group())
                label = 'female' if actor_id % 2 == 0 else 'male'
                print(f"Processing actor {actor} with label {label}")  
                for file_name in os.listdir(actor_path):
                    if file_name.endswith('.wav'):
                        file_path = os.path.join(actor_path, file_name)
                        mfccs = extract_mfcc(file_path)
                        features.append(mfccs)
                        labels.append(label)
    
    with open(output_file, 'wb') as f:
        pickle.dump((features, labels), f)
    print(f"Features and labels saved to {output_file}")

input_folder = 'D:\\ai ml prac\\actual\\data set\\processed audio'
output_file = 'D:\\ai ml prac\\actual\\data set\\features_labels.pkl'

extract_features(input_folder, output_file)


Processing actor Actor_01 with label male
Processing actor Actor_02 with label female
Processing actor Actor_03 with label male
Processing actor Actor_04 with label female
Processing actor Actor_05 with label male
Processing actor Actor_06 with label female
Processing actor Actor_07 with label male
Processing actor Actor_08 with label female
Processing actor Actor_09 with label male
Processing actor Actor_10 with label female
Processing actor Actor_11 with label male
Processing actor Actor_12 with label female
Processing actor Actor_13 with label male
Processing actor Actor_14 with label female
Processing actor Actor_15 with label male
Processing actor Actor_16 with label female
Processing actor Actor_17 with label male
Processing actor Actor_18 with label female
Processing actor Actor_19 with label male
Processing actor Actor_20 with label female
Processing actor Actor_21 with label male
Processing actor Actor_22 with label female
Processing actor Actor_23 with label male
Processing a

In [None]:
import pickle
from collections import Counter

with open('D:\\ai ml prac\\actual\\data set\\features_labels.pkl', 'rb') as f:
    features, labels = pickle.load(f)

label_counts = Counter(labels)
print("Label distribution:", label_counts)


Label distribution: Counter({'male': 720, 'female': 720})


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

X = np.array(features)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = SVC(kernel='linear')
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(report)


Accuracy: 0.97
Classification Report:
              precision    recall  f1-score   support

      female       0.99      0.95      0.97       148
        male       0.95      0.99      0.97       140

    accuracy                           0.97       288
   macro avg       0.97      0.97      0.97       288
weighted avg       0.97      0.97      0.97       288



In [None]:
import joblib
joblib.dump(clf, 'gender_classifier_model.pkl')


['gender_classifier_model.pkl']

In [None]:
file_path = r'D:\ai ml prac\actual\data set\processed audio\Actor_01\03-01-01-01-01-01-01.wav'


In [None]:
import os

if os.path.exists(file_path):
    print(f"The file exists: {file_path}")
else:
    print(f"The file does not exist: {file_path}")


The file exists: D:\ai ml prac\actual\data set\processed audio\Actor_01\03-01-01-01-01-01-01.wav


In [None]:
import os
import numpy as np
import librosa
import joblib

clf = joblib.load('gender_classifier_model.pkl')

def extract_mfcc(file_path, n_mfcc=13, sr=16000):
    try:
        audio, sr = librosa.load(file_path, sr=sr)
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
        mfccs_mean = np.mean(mfccs.T, axis=0)
        return mfccs_mean
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None

def predict_gender(audio_file):
    if not os.path.isfile(audio_file):
        raise FileNotFoundError(f"The file {audio_file} does not exist.")
    mfccs = extract_mfcc(audio_file)
    if mfccs is not None:
        prediction = clf.predict([mfccs])
        return prediction[0]
    return "Error"

file_path = r'D:\ai ml prac\actual\data set\processed audio\Actor_01\03-01-01-01-01-01-01.wav'
if os.path.exists(file_path):
    result = predict_gender(file_path)
    print(f'The predicted gender is: {result}')
else:
    print(f"The file does not exist: {file_path}")


The predicted gender is: male
