In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import joblib

# Load data
df = pd.read_csv("Titanic-Dataset.csv")

# Cleaning: Drop non-numeric unique columns
df = df.drop(columns=["passenger_id", "name", "ticket", "cabin"])

# Fill missing values
df["age"] = df["age"].fillna(df["age"].median())
df["embarked"] = df["embarked"].fillna(df["embarked"].mode()[0])
df["fare"] = df["fare"].fillna(df["fare"].median())

# Encode categories
le = LabelEncoder()
df["sex"] = le.fit_transform(df["sex"])
df["embarked"] = le.fit_transform(df["embarked"])

# Define X and y
X = df.drop("survived", axis=1)
y = df["survived"]

# Train Model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Save Model
joblib.dump(model, "titanic_model.pkl")
print("Model saved successfully as titanic_model.pkl")

Model saved successfully as titanic_model.pkl
