# Titanic Survival Prediction
**Data Science Internship Task - CODSOFT**

This notebook predicts survival of Titanic passengers using Logistic Regression.

In [None]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Step 2: Load the dataset
data = pd.read_csv("Titanic-Dataset.csv")
data.head()

In [None]:
# Step 3: Drop unimportant columns
data.drop(["Cabin", "Name", "Ticket"], axis=1, inplace=True)

# Fill missing values
data["Age"].fillna(data["Age"].mean(), inplace=True)
data["Embarked"].fillna(data["Embarked"].mode()[0], inplace=True)

# Encode categorical columns
le = LabelEncoder()
data["Sex"] = le.fit_transform(data["Sex"])   # male = 1, female = 0
data["Embarked"] = le.fit_transform(data["Embarked"])

data.head()

In [None]:
# Step 4: Split the dataset
X = data.drop("Survived", axis=1)
y = data["Survived"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Step 5: Train the model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

In [None]:
# Step 6: Predictions and accuracy
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print("Model Accuracy:", acc)
print("Confusion Matrix:\n", cm)

# Showing few actual vs predicted values
print("\nActual values  :", y_test[:5].values)
print("Predicted values:", y_pred[:5])

In [None]:
# Optional: Confusion matrix heatmap
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()