In [25]:
import numpy as np
import pandas as pd

# Load the Titanic dataset
titanic_df = pd.read_csv('Titanic-Dataset.csv')


# Preprocess the data
# Fill missing values
titanic_df['Age'] = titanic_df['Age'].fillna(titanic_df['Age'].median())
titanic_df['Embarked'] = titanic_df['Embarked'].fillna(titanic_df['Embarked'].mode()[0])
titanic_df['Fare'] = titanic_df['Fare'].fillna(titanic_df['Fare'].median())

# Convert categorical features to numerical
titanic_df['Sex'] = titanic_df['Sex'].map({'male': 0, 'female': 1})
titanic_df['Embarked'] = titanic_df['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})

# Select features and target variable
X = titanic_df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']].values
y = titanic_df['Survived'].values

# Handle NaN values in mean and std calculations
mean_X = np.nanmean(X, axis=0)
std_X = np.nanstd(X, axis=0)
mean_X = np.nan_to_num(mean_X, nan=0.0)
std_X = np.nan_to_num(std_X, nan=1.0)

# Normalize features
X_norm = (X - mean_X) / (std_X + 1e-8)  # Adding a small epsilon to avoid division by zero

# Define SVM class
class SVM:
    def __init__(self, C=1.0, max_iter=100, tol=1e-3):
        self.C = C  # Regularization parameter
        self.max_iter = max_iter  # Maximum number of iterations
        self.tol = tol  # Tolerance for stopping criteria
        self.w = None  # Weight vector
        self.b = 0  # Bias term
        self.alpha = None  # Lagrange multipliers
        self.support_vectors = None  # Support vectors

    def fit(self, X, y):
        n_samples, n_features = X.shape

        # Initialize Lagrange multipliers
        self.alpha = np.zeros(n_samples)

        # SMO algorithm for training
        for _ in range(self.max_iter):
            alpha_prev = np.copy(self.alpha)

            for i in range(n_samples):
                # Randomly select second Lagrange multiplier
                j = np.random.randint(0, n_samples)
                while j == i:
                    j = np.random.randint(0, n_samples)

                # Compute kernel function (linear kernel)
                K_ij = np.dot(X[i], X[j])

                # Compute SVM decision function for samples i and j
                eta = 2.0 * K_ij - K_ij - K_ij

                # Update Lagrange multipliers
                self.alpha[j] += y[j] * (y[i] * (alpha_prev[i] - self.alpha[i]) - eta)
                self.alpha[j] = max(0, min(self.alpha[j], self.C))

                self.alpha[i] += y[i] * y[j] * (alpha_prev[j] - self.alpha[j])

            # Compute weight vector and bias
            self.w = np.dot(self.alpha * y, X)
            support_vector_indices = np.where(self.alpha > 1e-5)[0]
            self.b = np.mean(y[support_vector_indices] - np.dot(X[support_vector_indices], self.w))

            # Check for convergence
            diff = np.linalg.norm(self.alpha - alpha_prev)
            if diff < self.tol:
                break

        # Store support vectors
        self.support_vectors = X[support_vector_indices]

    def predict(self, X):
        return np.sign(np.dot(X, self.w) + self.b)

# Instantiate and train the SVM model
svm_model = SVM()
svm_model.fit(X_norm, y)

# Make predictions on a new data point (for demonstration)
# Here, we handle missing values in the new data point before making predictions
new_data_point = np.array([[1, 0, 30, 1, 0, 50, 1]])  # Example new data point
new_data_point_cleaned = np.nan_to_num((new_data_point - mean_X) / (std_X + 1e-8))
prediction = svm_model.predict(new_data_point_cleaned)
print("Survival prediction:", prediction)


Survival prediction: [nan]


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
