In [1]:
import pandas as pd

# Load the dataset
data = pd.read_csv("titanic.csv")


In [2]:
# Drop irrelevant columns
data = data.drop(["PassengerId", "Name", "Ticket", "Cabin"], axis=1)

# Handle missing values
data["Age"].fillna(data["Age"].mean(), inplace=True)
data["Embarked"].fillna(data["Embarked"].mode()[0], inplace=True)

# Convert categorical variables to numerical
data["Sex"] = data["Sex"].map({"male": 0, "female": 1})
data["Embarked"] = data["Embarked"].map({"S": 0, "C": 1, "Q": 2})

# Split the data into features (X) and target (y)
X = data.drop("Survived", axis=1)
y = data["Survived"]


In [3]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

# Create kNN model
knn_model = KNeighborsClassifier()

# Create SVM model
svm_model = SVC()


In [4]:
from sklearn.model_selection import cross_val_score, StratifiedKFold

# Perform k-fold cross-validation for kNN
kfold = 10
knn_scores = cross_val_score(knn_model, X, y, cv=kfold)
knn_avg_accuracy = knn_scores.mean()

# Perform stratified k-fold cross-validation for SVM
stratified_kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
svm_scores = cross_val_score(svm_model, X, y, cv=stratified_kfold)
svm_avg_accuracy = svm_scores.mean()

# Print the average accuracy scores
print("kNN Average Accuracy:", knn_avg_accuracy)
print("SVM Average Accuracy:", svm_avg_accuracy)


kNN Average Accuracy: 0.6959300873907617
SVM Average Accuracy: 0.6789637952559302
