In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
import joblib

In [2]:
df = pd.read_csv("../dataset/train.csv")

cols = ["Survived","Pclass","Sex","Age","SibSp","Parch","Fare","Embarked"]
df = df[cols].copy()
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,male,22.0,1,0,7.25,S
1,1,1,female,38.0,1,0,71.2833,C
2,1,3,female,26.0,0,0,7.925,S
3,1,1,female,35.0,1,0,53.1,S
4,0,3,male,35.0,0,0,8.05,S


In [3]:
num_cols = ["Age","SibSp","Parch","Fare"]
cat_cols = ["Pclass","Sex","Embarked"]

for c in num_cols:
    if c in df.columns:
        df[c] = df[c].fillna(df[c].median())

for c in cat_cols:
    if c in df.columns:
        df[c] = df[c].fillna(df[c].mode()[0])

df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,male,22.0,1,0,7.25,S
1,1,1,female,38.0,1,0,71.2833,C
2,1,3,female,26.0,0,0,7.925,S
3,1,1,female,35.0,1,0,53.1,S
4,0,3,male,35.0,0,0,8.05,S


In [4]:
df = pd.get_dummies(df, columns=["Pclass","Sex","Embarked"], drop_first=True)
df.head()

Unnamed: 0,Survived,Age,SibSp,Parch,Fare,Pclass_2,Pclass_3,Sex_male,Embarked_Q,Embarked_S
0,0,22.0,1,0,7.25,False,True,True,False,True
1,1,38.0,1,0,71.2833,False,False,False,False,False
2,1,26.0,0,0,7.925,False,True,False,False,True
3,1,35.0,1,0,53.1,False,False,False,False,True
4,0,35.0,0,0,8.05,False,True,True,False,True


In [5]:
y = df["Survived"].astype(int)
X = df.drop(columns=["Survived"])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [6]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)

acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", cm)


Accuracy Score: 0.804
Confusion Matrix:
 [[98 12]
 [23 46]]


In [8]:
joblib.dump((model, scaler, X.columns.tolist()), "../model/titanic_model.pkl")
print("Model saved as titanic_model.pkl")


Model saved as titanic_model.pkl
