In [None]:
#--- Libraries ---#
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#--- Style Configuration ---#
sns.set_style("whitegrid")
plt.style.use('seaborn-v0_8-deep')
sns.set_palette("pastel")
sns.set_context("paper")

In [None]:
#--- Data charge ---#
file_path = '../datasets/titanic/train.csv'
df=pd.read_csv(file_path)

In [None]:
#--- Check the first 5 rows ---#
df.head()

In [None]:
#--- Technical Summary ---#
df.info()

In [None]:
#--- Statistical Summary ---#
df.describe()

In [None]:
#---We're going to check the target variable (Survived)
plt.figure(figsize=(8,6))
sns.countplot(x='Survived', data=df) # This will now be in pastel colors
plt.title('Survival Distribution (0 = No, 1 = Yes)', fontsize=14)
plt.xlabel('Status', fontsize=12)
plt.ylabel('Number of Passengers', fontsize=12)
plt.xticks([0, 1], ['Did not Survive', 'Survived'])
plt.show()

In [None]:
#--- A plot to see how many people from each class survived ---#
plt.figure(figsize=(10,6))
sns.countplot(
    x='Pclass',
    data=df,
    hue='Survived',
    palette='viridis',
)

#--- Titles and labels for clarity ---#
plt.title('Survival Count by Passenger Class', fontsize=14)
plt.xlabel('Passenger Class', fontsize=12)
plt.ylabel('Number of Passengers', fontsize=12)
plt.legend(
    title='Status',
    labels=['Not Survived', 'Survived'],
)
plt.show()

In [None]:
#--- Now , let's go with the survival rates for men and women ---#
plt.figure(figsize=(10,6))
sns.countplot(
    x='Sex',
    hue='Survived',
    palette='plasma',
    data=df,
)

#--- Titles and labels for clarity ---#
plt.title('Survival Count by Gender', fontsize=14)
plt.xlabel('Gender', fontsize=12)
plt.ylabel('Number of Passengers', fontsize=12)
plt.legend(
    title='Status',
    labels=['Not Survived', 'Survived'],
)



In [None]:
#--- Grid of plots: one for passengers who survived, one for those who did not ---#

g = sns.FacetGrid(df, col='Survived', height=6)

g.map(sns.histplot, 'Age', bins=30, kde=True)

g.set_axis_labels("Age", "Number of Passengers")
g.axes[0,0].set_title('Did not Survive')
g.axes[0,1].set_title('Survived')

plt.show()

In [None]:
#--- Now we are going to check the relation with the ticket prize ---#
plt.figure(figsize=(10,6))

sns.boxplot(x='Survived', y='Fare', data=df, palette='viridis')

#--- Add titles and labels ---#

plt.title('Fare Distribution by Survival Status', fontsize=14)
plt.xlabel('Survival Status', fontsize=12)
plt.ylabel('Fare', fontsize=12)
plt.xticks([0,1],['Not Survived', 'Survived'])
plt.ylim(0,300)
plt.show()