# ðŸš¢ Titanic Survival Prediction â€” Data Science Internship Project  
**Author:** Krishiv Lillad  
**Date:** 06 November 2025  

This project predicts whether a passenger survived the Titanic disaster using machine learning (Logistic Regression).  
The dataset includes age, gender, passenger class, fare, and more.  

We will perform:
- Data loading  
- Data cleaning & preprocessing  
- Exploratory Data Analysis (EDA)  
- Feature encoding  
- Model training  
- Performance evaluation  

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

sns.set(style="darkgrid")

In [None]:
df = pd.read_csv("titanicdataset.csv")

df.head()

In [None]:
df.isnull().sum()

In [None]:
df["Age"].fillna(df["Age"].median(), inplace=True)
df["Embarked"].fillna(df["Embarked"].mode()[0], inplace=True)
df.drop(columns=["Cabin"], inplace=True, errors="ignore")

df.isnull().sum()

In [None]:
plt.figure(figsize=(6,4))
sns.countplot(x="Survived", data=df)
plt.title("Survival Count")
plt.show()

In [None]:
plt.figure(figsize=(6,4))
sns.countplot(x="Survived", hue="Sex", data=df)
plt.title("Survival by Gender")
plt.show()

In [None]:
df.replace({
    "Sex": {"male": 0, "female": 1},
    "Embarked": {"S": 0, "C": 1, "Q": 2}
}, inplace=True)

df.head()

In [None]:
X = df[["Pclass", "Sex", "Age", "Fare", "Embarked"]]
y = df["Survived"]

X.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
model = LogisticRegression(max_iter=300)
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("Model Accuracy:", accuracy)

In [None]:
cm = confusion_matrix(y_test, y_pred)

sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
sample = pd.DataFrame({
    "Pclass": [3],
    "Sex": [1],
    "Age": [22],
    "Fare": [7.25],
    "Embarked": [0]
})

model.predict(sample)