# 🚢 Logistic Regression on Titanic Dataset
This notebook demonstrates how to build a **Logistic Regression model** for predicting Titanic survival using Python and scikit-learn.

Steps covered:
1. Load dataset
2. Preprocess data
3. Train-test split
4. Train Logistic Regression model
5. Evaluate performance
6. Visualize results


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc

## Step 1: Load Titanic Dataset

In [None]:
# Load Titanic dataset from seaborn
df = sns.load_dataset('titanic')
df.head()

## Step 2: Data Preprocessing

In [None]:
# Select relevant features and drop rows with missing values
features = ['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare', 'embarked']
df_model = df[features + ['survived']].dropna()

# Convert categorical columns to numeric using one-hot encoding
df_model = pd.get_dummies(df_model, columns=['sex','embarked'], drop_first=True)
df_model.head()

## Step 3: Train-test Split

In [None]:
X = df_model.drop('survived', axis=1)
y = df_model['survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

## Step 4: Train Logistic Regression Model

In [None]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_pred_prob = model.predict_proba(X_test)[:,1]

## Step 5: Evaluate Model

In [None]:
acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print(f"✅ Accuracy: {acc:.2f}")
print("\nConfusion Matrix:")
print(cm)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

## Step 6: ROC Curve

In [None]:
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(6,6))
plt.plot(fpr, tpr, label=f'AUC = {roc_auc:.2f}')
plt.plot([0,1], [0,1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - Logistic Regression')
plt.legend()
plt.show()