In [4]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression

# 1️⃣ Load datasets
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

# 2️⃣ Handle missing values
# Fill Age and Fare
train["Age"]=train["Age"].fillna(train["Age"].median())
train["Embarked"]=train["Embarked"].fillna(train["Embarked"].mode()[0])
train.drop(columns=["Cabin", "Ticket", "Name", "PassengerId"], inplace=True)

test["Age"] = test["Age"].fillna(train["Age"].median())
test["Fare"] = test["Fare"].fillna(train["Fare"].median())
test.drop(columns=["Cabin", "Ticket", "Name", "PassengerId"], inplace=True, errors='ignore')

# 3️⃣ Encode categorical features
# Map Sex
sex_map = {'male': 1, 'female': 0}
train["Sex"] = train["Sex"].map(sex_map)
test["Sex"] = test["Sex"].map(sex_map)

# Map Embarked
embarked_map = {'S': 0, 'C': 1, 'Q': 2}
train["Embarked"] = train["Embarked"].map(embarked_map)
test["Embarked"] = test["Embarked"].map(embarked_map)

# 4️⃣ Split features and target
X_train = train.drop("Survived", axis=1)
y_train = train["Survived"]

X_test = test.copy()  # All features from test set

# 5️⃣ Train Logistic Regression
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# 6️⃣ Predict survivors for test set
test["Survived"] = model.predict(X_test)

# 7️⃣ Save the updated test set with predictions
test.to_csv("test_with_survived.csv", index=False)
