In [1]:
# Import essential data analysis and visualization libraries
import pandas as pd  # For data manipulation and analysis
import numpy as np   # For numerical computations
import matplotlib.pyplot as plt  # For creating static, animated, and interactive visualizations
import seaborn as sns  # For statistical data visualization built on top of matplotlib

In [None]:
# Load the Titanic dataset from seaborn's built-in datasets
# This dataset contains information about passengers on the Titanic including survival status, class, age, fare, etc.
dataset = sns.load_dataset('titanic')
dataset.head()

In [None]:
# Create a distribution plot of fare prices using seaborn
# This will show both the histogram and kernel density estimate (KDE) by default
# The KDE provides a smooth estimate of the probability density function
sns.distplot(dataset['fare'])

In [None]:
# Create a distribution plot of fare prices using seaborn, showing only the histogram without KDE
# This helps visualize the frequency distribution of fare prices in a simpler way
# kde=False removes the kernel density estimate overlay
sns.distplot(dataset['fare'], kde=False)

In [None]:
# The bins parameter controls the number of intervals (or bars) in the histogram
# A higher number of bins shows more detail but can make the plot noisy
# A lower number of bins provides a clearer overview but may hide some patterns
# The default value is typically 10 bins, but you can adjust it based on your data and visualization needs
sns.jointplot(x='age', y='fare', data=dataset)


In [None]:
# Create a distribution plot of fare prices with 10 bins and no KDE
# This provides a clear view of fare price distribution with a moderate level of detail
# The bins=10 parameter creates 10 equal-width intervals for the histogram
# kde=False removes the kernel density estimate for a cleaner histogram view
sns.distplot(dataset['fare'], kde=False, bins=10)

In [None]:
# Create a hexbin joint plot to visualize the relationship between age and fare
# kind='hex' creates hexagonal bins that show the density of points
# This is useful for large datasets as it helps visualize the concentration of data points
# Darker hexagons indicate higher density of passenger data points
sns.jointplot(x='age', y='fare', data=dataset, kind='hex')

In [None]:
# The pairplot() function creates a grid of scatter plots showing relationships between all pairs of numerical variables
# It also includes histograms along the diagonal to show the distribution of each variable
# The hue parameter colors the points based on the 'sex' column, making it easy to see patterns between genders
# This visualization helps identify correlations and patterns across multiple variables simultaneously
dataset = dataset.dropna()
sns.pairplot(dataset, hue='sex')

In [None]:
# The rugplot() function creates a rug plot, which displays individual data points as small vertical bars along the x-axis
# This visualization helps show the exact distribution of fare prices in the dataset
# Each vertical bar represents a single passenger's fare, making it easy to see clusters and gaps in the data
# Rug plots are often used in combination with other plots like histograms or KDE plots to provide additional detail
sns.rugplot(dataset['fare'])