In [2]:
# =========================================
# Titanic Survival Prediction using SVM (Colab)
# =========================================

# Problem Statement:
# Predict whether a passenger survived (1) or not (0) on the Titanic
# using features like Pclass, Sex, Age, SibSp, Parch, Fare, Embarked.

# =========================================
# Step 1: Import Libraries
# =========================================
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# =========================================
# Step 2: Load Dataset
# =========================================
# Upload train.csv in Colab first
data = pd.read_csv("train.csv")
data.head()

# =========================================
# Step 3: Data Preprocessing
# =========================================
# 3.1 Fill missing values
data['Age'] = data['Age'].fillna(data['Age'].median())
data['Embarked'] = data['Embarked'].fillna(data['Embarked'].mode()[0])
data['Fare'] = data['Fare'].fillna(data['Fare'].median())

# 3.2 Encode categorical variables using LabelEncoder
sex_encoder = LabelEncoder()
embarked_encoder = LabelEncoder()

data['Sex'] = sex_encoder.fit_transform(data['Sex'])           # male/female -> 0/1
data['Embarked'] = embarked_encoder.fit_transform(data['Embarked']) # C/Q/S -> 0/1/2

# 3.3 Select features and target
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
X = data[features]
y = data['Survived']

# 3.4 Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# =========================================
# Step 4: Split dataset into training and testing
# =========================================
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# =========================================
# Step 5: Train SVM Classifier
# =========================================
# Parameters explained:
# kernel='rbf' -> non-linear kernel
# C=1.0 -> regularization
# gamma='scale' -> automatic kernel coefficient
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', probability=True)
svm_model.fit(X_train, y_train)

# =========================================
# Step 6: Evaluate Model
# =========================================
y_pred = svm_model.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Classification Report
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Confusion Matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# =========================================
# Step 7: Predict survival for a new sample passenger
# =========================================
# You can now pass 'male', 'female', 'C', 'Q', 'S' directly
sample_passenger = pd.DataFrame({
    'Pclass': [3],
    'Sex': ['male'],       # text
    'Age': [25],
    'SibSp': [0],
    'Parch': [0],
    'Fare': [7.25],
    'Embarked': ['S']      # text
})

# Transform categorical columns using the same encoders as training
sample_passenger['Sex'] = sex_encoder.transform(sample_passenger['Sex'])
sample_passenger['Embarked'] = embarked_encoder.transform(sample_passenger['Embarked'])

# Scale the sample
sample_scaled = scaler.transform(sample_passenger)

# Predict
predicted_class = svm_model.predict(sample_scaled)
print("Predicted Class for Sample Passenger:", "Survived" if predicted_class[0]==1 else "Did not survive")


Accuracy: 0.8156424581005587

Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.89      0.85       105
           1       0.82      0.72      0.76        74

    accuracy                           0.82       179
   macro avg       0.82      0.80      0.81       179
weighted avg       0.82      0.82      0.81       179

Confusion Matrix:
 [[93 12]
 [21 53]]
Predicted Class for Sample Passenger: Did not survive
