In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Load dataset
file_path = "/mnt/data/Titanic.csv"
df = pd.read_csv(file_path)

# Display basic info
display(df.info())
display(df.head())

def data_cleaning(df):
    """Function to clean and preprocess Titanic dataset."""
    # Handling missing values
    df['age'].fillna(df['age'].median(), inplace=True)
    df['embarked'].fillna(df['embarked'].mode()[0], inplace=True)
    
    # Convert categorical features
    df['sex'] = df['sex'].map({'male': 0, 'female': 1})
    df['alone'] = df['alone'].astype(int)
    
    return df

df = data_cleaning(df)

# Exploratory Data Analysis (EDA)
plt.figure(figsize=(8, 5))
sns.countplot(x='survived', data=df, palette='coolwarm')
plt.title("Survival Count")
plt.show()

plt.figure(figsize=(8, 5))
sns.boxplot(x='survived', y='age', data=df, palette='coolwarm')
plt.title("Age Distribution by Survival")
plt.show()

plt.figure(figsize=(10, 6))
sns.histplot(df['fare'], bins=30, kde=True, color='purple')
plt.title('Distribution of Fare Prices')
plt.show()

plt.figure(figsize=(8, 5))
sns.countplot(x='class', hue='survived', data=df, palette='coolwarm')
plt.title("Survival by Passenger Class")
plt.show()

plt.figure(figsize=(8, 5))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title("Correlation Heatmap")
plt.show()

print("Data Analysis Completed Successfully!")
