In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score

csv_location = "exrc06p02_voice.csv"

# Load input CSV which contains data related to human voices into pandas.DataFrame
df = pd.read_csv(csv_location)

# # Get basic information about data
# print(df.info())  # prints concise summary about DataFrame's structure
# print(df.head())  # prints first five rows - default

# Extract Feature (X) and Target (y)
X = df.iloc[:, :-1]   # Features (all rows and columns except last column)
y = df.iloc[:, -1]    # Target (all rows with last column Only)

# # Lets print info related to datatypes for Features and Target
# # Regression models works only with numeric data
# print(X.info())   # All Features are numeric
# print(y.info())   # Target value is non-numeric

# Lets print what diff values on Target ie ´label´ column 
# print(y.value_counts())   # values are either male or female

# Map ´label´ column ie Target to binary: male -> 0, female -> 1
y = y.map({"male": 0, "female": 1})

# print(y.info())         # confirm the datatype again
# print(y.value_counts()) # confirm the values and distribution for Target ie ´label´ column 

# Split train/test set 70/30 %
X_train, X_test, y_train, y_test = train_test_split(
    X, 
    y, 
    train_size=0.7, # to split data as 70% for training and rest 30% for testing
    stratify=y,     # to keep the same class ratio in training and test sets
    random_state=42 # to ensure same rows go to train and test sets in every run for consistency purpose
)

# Create SVM model (Support Vector Classifier)
model = SVC(
    kernel="rbf",             # use RBF kernel to learn non-linear decision boundaries
    class_weight="balanced",  # adjust importance of classes to handle imbalanced data
    random_state=42           # ensure reproducible and consistent results
)

# Train the SVM model
model.fit(X_train, y_train)

# Prediction with test data
y_pred = model.predict(X_test)

# Evaluate model's Accuracy Score and Confusion Matrix
acc_score = accuracy_score(y_test, y_pred)      
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy Score with SVM Model: {acc_score:.4f}")
print(f"\nConfusion Matrix with SVM Model:\n{conf_matrix}")

Accuracy Score with SVM Model: 0.6614

Confusion Matrix with SVM Model:
[[377  99]
 [223 252]]
