In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv("Titanic-Dataset.xls")
df = data
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

In [4]:
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']
target = 'Survived'

df['Sex'] = LabelEncoder().fit_transform(df['Sex'])

In [5]:
df['Age'] = df['Age'].fillna(df['Age'].median())
df['Fare'] = df['Fare'].fillna(df['Fare'].median())


In [6]:
X = df[features].values
y = df[target].values

In [7]:
Classes = np.unique(y)
k = len(Classes)
m, n = X.shape

In [8]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

phi = np.array([np.mean(y_train==k) for k in Classes])
# print(phi)

mu = np.array([x_train[y_train==k].mean(axis=0) for k in Classes])
# print(mu)

X_centred = np.vstack([x_train[y_train==k] - mu[k] for k in Classes])
# print(X_centred)
U, S, Vt = np.linalg.svd(X_centred, full_matrices=False)
Sigma = (Vt.T * (S**2 / len(x_train))) @ Vt

print(Sigma.shape)

(6, 6)


In [9]:
def predict(x_new):
    inv_sigma = np.linalg.inv(Sigma)
    y_pred = []
    for x in x_new:
        scores  = []
        for k in Classes:
            score  = -0.5 * ((x-mu[k]).T @ inv_sigma @ (x-mu[k])) + np.log(phi[k])
            scores.append(score)
        y_pred.append(np.argmax(scores))
    return np.array(y_pred)

In [10]:
# --- Step 5: Evaluate ---
y_train_pred = predict(x_train)
y_test_pred  = predict(x_test)
# print(y_test_pred)

train_acc = np.mean(y_train_pred == y_train)
test_acc  = np.mean(y_test_pred  == y_test)

print(f"Training Accuracy: {train_acc:.3f}")
print(f"Test Accuracy: {test_acc:.3f}")

Training Accuracy: 0.798
Test Accuracy: 0.787


In [12]:
x_test

array([[ 3.    ,  1.    , 28.    ,  1.    ,  1.    , 15.2458],
       [ 2.    ,  1.    , 31.    ,  0.    ,  0.    , 10.5   ],
       [ 3.    ,  1.    , 20.    ,  0.    ,  0.    ,  7.925 ],
       ...,
       [ 1.    ,  1.    , 65.    ,  0.    ,  0.    , 26.55  ],
       [ 3.    ,  1.    , 17.    ,  0.    ,  0.    ,  8.6625],
       [ 1.    ,  1.    , 28.    ,  0.    ,  0.    , 26.55  ]],
      shape=(268, 6))

In [11]:
y_test_pred

array([0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0,
       0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0,
       0, 0, 0, 0])