In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


Loading and Preprocessing

In [3]:
df = pd.read_csv('titanic.csv')
print(df.head())
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
df['Age'].fillna(df['Age'].median(), inplace=True)

X = df[['Pclass', 'Age', 'Sex']]
y = df['Survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.0500   NaN        S  


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].median(), inplace=True)


Training the model

In [4]:
# Initialize and train the SVM classifier
svm_model = SVC(kernel='linear', C=1.0, random_state=42)
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)
#METRICS
accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm)
recall_svm = recall_score(y_test, y_pred_svm)
print(f"SVM - Accuracy: {accuracy_svm:.2f}")
print(f"SVM - Precision: {precision_svm:.2f}")
print(f"SVM - Recall: {recall_svm:.2f}")
'''#k means --
df = pd.read_csv('titanic.csv')
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
df['Age'].fillna(df['Age'].median(), inplace=True)
# Select relevant features for clustering
X = df[['Pclass', 'Age', 'Sex']]
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X_scaled)
df['Cluster'] = kmeans.labels_
plt.figure(figsize=(10, 6))
sns.scatterplot(x=X['Age'], y=X['Pclass'], 
        hue=df['Cluster'], palette='viridis', style=df['Sex'])
plt.title('k-Means Clustering Results')
plt.xlabel('Age')
plt.ylabel('Pclass')
plt.show()'''

SVM - Accuracy: 0.78
SVM - Precision: 0.75
SVM - Recall: 0.70


"#k means --\ndf = pd.read_csv('titanic.csv')\ndf['Sex'] = df['Sex'].map({'male': 0, 'female': 1})\ndf['Age'].fillna(df['Age'].median(), inplace=True)\n# Select relevant features for clustering\nX = df[['Pclass', 'Age', 'Sex']]\nfrom sklearn.preprocessing import StandardScaler\nscaler = StandardScaler()\nX_scaled = scaler.fit_transform(X)\nkmeans = KMeans(n_clusters=3, random_state=42)\nkmeans.fit(X_scaled)\ndf['Cluster'] = kmeans.labels_\nplt.figure(figsize=(10, 6))\nsns.scatterplot(x=X['Age'], y=X['Pclass'], \n        hue=df['Cluster'], palette='viridis', style=df['Sex'])\nplt.title('k-Means Clustering Results')\nplt.xlabel('Age')\nplt.ylabel('Pclass')\nplt.show()"

In [5]:
# Make predictions
y_pred_svm = svm_model.predict(X_test)


In [6]:
# Calculate metrics
accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm)
recall_svm = recall_score(y_test, y_pred_svm)

print(f"SVM - Accuracy: {accuracy_svm:.2f}")
print(f"SVM - Precision: {precision_svm:.2f}")
print(f"SVM - Recall: {recall_svm:.2f}")


SVM - Accuracy: 0.78
SVM - Precision: 0.75
SVM - Recall: 0.70
