In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score

In [None]:
# Load Titanic dataset
df = sns.load_dataset('titanic')

In [None]:
# Fill missing numeric values with median
for col in df.select_dtypes(include='number').columns:
    df[col] = df[col].fillna(df[col].median())

In [None]:
# Fill missing categorical values with mode
for col in df.select_dtypes(include=['category', 'object']).columns:
    df[col] = df[col].fillna(df[col].mode()[0])

In [None]:
# Separate features and target
X = df.drop('survived', axis=1)
y = df['survived']

In [None]:
# Identify categorical and numeric columns
categorical_cols = X.select_dtypes(include=['category', 'object']).columns
numeric_cols = X.select_dtypes(include='number').columns

In [None]:
# One-hot encode categorical columns
encoder = OneHotEncoder(drop='first', sparse=False)
X_cat = encoder.fit_transform(X[categorical_cols])

In [None]:
# Get encoded feature names
encoded_cat_names = encoder.get_feature_names_out(categorical_cols)

In [None]:
# Combine numeric and encoded categorical columns
X_combined = np.hstack([X[numeric_cols].values, X_cat])

In [None]:
# Create final feature names list
final_feature_names = list(numeric_cols) + list(encoded_cat_names)
print("\nFinal Features After Encoding:")
print(final_feature_names)

In [None]:
# Standardize all features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_combined)