In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_openml

# Load the Titanic dataset
titanic = fetch_openml(name='titanic', version=1, as_frame=True)
X = titanic.data
y = titanic.target

# Pre-processing steps
# For simplicity, we will drop non-numeric columns and handle missing values
X = X.drop(['name', 'cabin', 'embarked', 'boat', 'body', 'home.dest', 'ticket'], axis=1)
X['sex'] = X['sex'].map({'male': 0, 'female': 1})
X = X.apply(pd.to_numeric, errors='coerce')  # Convert non-numeric values to NaN
X = SimpleImputer(strategy='mean').fit_transform(X)  # Impute missing values with mean

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define kNN and SVM models
knn_model = KNeighborsClassifier()
svm_model = SVC()

# Combine models with pipelines for simplicity
knn_pipeline = make_pipeline(knn_model)
svm_pipeline = make_pipeline(svm_model)

# Define k-fold and stratified k-fold cross-validation techniques
k_fold = 5
stratified_k_fold = StratifiedKFold(n_splits=k_fold)

# Evaluate kNN model with k-fold cross-validation
knn_cross_val_scores = cross_val_score(knn_pipeline, X_train, y_train, cv=k_fold)
knn_avg_accuracy = knn_cross_val_scores.mean()
print(f'kNN Average Accuracy (k-fold): {knn_avg_accuracy}')

# Evaluate SVM model with stratified k-fold cross-validation
svm_cross_val_scores = cross_val_score(svm_pipeline, X_train, y_train, cv=stratified_k_fold)
svm_avg_accuracy = svm_cross_val_scores.mean()
print(f'SVM Average Accuracy (stratified k-fold): {svm_avg_accuracy}')


  warn(


kNN Average Accuracy (k-fold): 0.7908133971291866
SVM Average Accuracy (stratified k-fold): 0.8022328548644339
