# Titanic Survival Prediction (Super Simple Model)

This notebook trains a very simple Logistic Regression model on the Titanic dataset.
It uses only a few steps so it’s easy to understand.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


In [None]:
# Load train and test datasets
train = pd.read_csv("data/train.csv")
test = pd.read_csv("data/test.csv")

# Look at the first few rows
train.head()


In [None]:
# Fill missing values with simple strategies
train["Age"].fillna(train["Age"].median(), inplace=True)
test["Age"].fillna(test["Age"].median(), inplace=True)

train["Fare"].fillna(train["Fare"].median(), inplace=True)
test["Fare"].fillna(test["Fare"].median(), inplace=True)

train["Embarked"].fillna(train["Embarked"].mode()[0], inplace=True)
test["Embarked"].fillna(test["Embarked"].mode()[0], inplace=True)


In [None]:
# Convert text values into numbers
train["Sex"] = train["Sex"].map({"male": 0, "female": 1})
test["Sex"] = test["Sex"].map({"male": 0, "female": 1})

embarked_map = {"S": 0, "C": 1, "Q": 2}
train["Embarked"] = train["Embarked"].map(embarked_map)
test["Embarked"] = test["Embarked"].map(embarked_map)


In [None]:
# Select the columns we want to use for prediction
features = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]
X = train[features]
y = train["Survived"]


In [None]:
# Split into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Create and train the model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)


In [None]:
# Test the model on validation data
y_pred = model.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, y_pred))


In [None]:
# Predict on test set
test_pred = model.predict(test[features])

# Save predictions in submission.csv
submission = pd.DataFrame({
    "PassengerId": test["PassengerId"],
    "Survived": test_pred
})
submission.to_csv("submission.csv", index=False)

print("Submission file 'submission.csv' created!")
