In [29]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [30]:
data = pd.read_csv('voice.csv')

In [31]:
X = data.iloc[:, :-1].values  
y = data.iloc[:, -1].values 

In [32]:
# Encode the labels
le = LabelEncoder()
y = le.fit_transform(y)

In [33]:
# Scale the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [34]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [35]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.base import BaseEstimator, ClassifierMixin

In [36]:
# Define the neural network
def build_nn_model():
    model = Sequential([
        Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
        Dropout(0.5),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')  # Use sigmoid for binary classification
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [37]:
nn_model = build_nn_model()

In [38]:
class KerasClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, model, epochs=50, batch_size=32):
        self.model = model
        self.epochs = epochs
        self.batch_size = batch_size

    def fit(self, X, y):
        self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, verbose=0)
        return self

    def predict(self, X):
        return (self.model.predict(X) > 0.5).astype("int32")

    def predict_proba(self, X):
        return np.hstack([(1 - self.model.predict(X)), self.model.predict(X)])

In [39]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)

In [40]:
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score


In [41]:
keras_clf = KerasClassifier(model=nn_model)


In [42]:
ensemble_model = VotingClassifier(estimators=[
    ('nn', keras_clf),
    ('rf', rf_model),
    ('gb', gb_model)
], voting='soft')


In [43]:
ensemble_model.fit(X_train, y_train)



In [44]:
y_pred = ensemble_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)
print(f'Test Accuracy: {test_accuracy:.4f}')

Test Accuracy: 0.9842


In [45]:
import pickle
with open('gender_recognition_model.pkl', 'wb') as f:
    pickle.dump(ensemble_model, f)