# 🧪 Day 1: Exploratory Data Analysis (EDA)
## 🚢 Titanic Dataset

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Load dataset
df = sns.load_dataset('titanic')

In [None]:
# Basic info
print('Shape:', df.shape)
print('\nColumns:', df.columns.tolist())
print('\nData Types:\n', df.dtypes)
print('\nFirst 5 rows:\n')
df.head()

In [None]:
# Missing values
print('Missing Values:\n', df.isnull().sum())

sns.heatmap(df.isnull(), cbar=False, cmap='viridis', yticklabels=False)
plt.title('Missing Values Heatmap')
plt.show()

In [None]:
# Descriptive statistics
print('Numerical Summary:\n', df.describe())
print('\nCategorical Summary:\n', df.describe(include=['O']))

In [None]:
# Univariate analysis
sns.histplot(df['age'].dropna(), kde=True, bins=20)
plt.title('Age Distribution')
plt.show()

sns.countplot(data=df, x='pclass')
plt.title('Passenger Class Count')
plt.show()

In [None]:
# Bivariate analysis
sns.barplot(x='sex', y='survived', data=df)
plt.title('Survival Rate by Gender')
plt.show()

sns.barplot(x='pclass', y='survived', data=df)
plt.title('Survival Rate by Passenger Class')
plt.show()

In [None]:
# Correlation matrix
numeric_df = df.select_dtypes(include=[np.number])
corr = numeric_df.corr()

sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

## 🔍 Key Insights
- Around 20% of age data is missing.
- Female passengers had a higher survival rate.
- Passengers in class 1 had better survival chances than classes 2 and 3.
- Fare and age don't show strong correlation.