In [None]:

# TITANIC SURVIVAL PREDICTION PROJECT

#  Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


#  Load Dataset


df = pd.read_csv("Titanic-Dataset.csv")   # Make sure file is in same folder
print("First 5 rows:")
print(df.head())

print("\nDataset Info:")
print(df.info())


#  Data Cleaning & Preprocessing


# Drop unnecessary columns
df.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)

# Fill missing Age with median
df['Age'].fillna(df['Age'].median(), inplace=True)

# Fill missing Embarked with mode
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)

# Encode categorical columns
le = LabelEncoder()
df['Sex'] = le.fit_transform(df['Sex'])
df['Embarked'] = le.fit_transform(df['Embarked'])


#  Define Features and Target


X = df.drop('Survived', axis=1)
y = df['Survived']

# Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


#  Train Model (Logistic Regression)


model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

#  Predictions


y_pred = model.predict(X_test)


#  Model Evaluation


print("\nModel Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:\n", cm)


#  Visualization (Single Plots Only)


# Survival Distribution
plt.figure()
df['Survived'].value_counts().plot(kind='bar')
plt.title("Survival Distribution (0 = No, 1 = Yes)")
plt.xlabel("Survived")
plt.ylabel("Count")
plt.show()

# Age Distribution
plt.figure()
df['Age'].hist()
plt.title("Age Distribution")
plt.xlabel("Age")
plt.ylabel("Frequency")
plt.show()

# Fare Distribution
plt.figure()
df['Fare'].hist()
plt.title("Fare Distribution")
plt.xlabel("Fare")
plt.ylabel("Frequency")
plt.show()

