In [1]:
# Import necessary libraries for data analysis and visualization
import numpy as np  # For numerical operations and array handling
import pandas as pd  # For data manipulation and analysis
import matplotlib.pyplot as plt  # For creating static, animated, and interactive visualizations
import seaborn as sns  # For statistical data visualization
from seaborn import load_dataset  # For loading example datasets from seaborn

In [2]:
# Load the Titanic dataset from CSV file
# This dataset contains information about passengers on the Titanic including survival status, passenger class, age, gender, etc.
# The data will be used for various data analysis and visualization tasks
data = pd.read_csv("titanic_Dataset.csv")

In [3]:
# Load the tips dataset from seaborn library
# This dataset contains information about restaurant tips including total bill amount, tip amount, gender of the payer, 
# whether they smoked, day of the week, time of day, and size of the party
# The data will be used for analyzing tipping patterns and relationships between different variables
tips = load_dataset("tips")

In [None]:
# Create a count plot to visualize the distribution of survival status (0 = Not Survived, 1 = Survived)
# This helps understand the proportion of passengers who survived vs. did not survive the Titanic disaster
sns.countplot(data['Survived'])
plt.show()

In [None]:
# Create a pie chart to visualize the distribution of passengers by gender
# The pie chart shows the proportion of male and female passengers on the Titanic
# autopct="%.2f" formats the percentage values to show 2 decimal places
data['Sex'].value_counts().plot(kind="pie", autopct="%.2f")
plt.show()

In [None]:
# Create a histogram to visualize the age distribution of passengers
# bins=5 specifies the number of intervals to divide the age range into
# This helps understand the frequency distribution of passenger ages
plt.hist(data['Age'], bins=5) 
plt.show()

In [None]:
# Create a distribution plot to visualize the age distribution of passengers
# This plot combines a histogram with a kernel density estimate (KDE) to show both the frequency distribution and the probability density
# The KDE helps smooth out the data and provides a better understanding of the underlying distribution
sns.distplot(data['Age'])  
plt.show()

In [None]:
# Create a scatter plot to visualize the relationship between total bill and tip amount, 
# with different colors representing different genders (male/female)
# This helps analyze if there's any correlation between bill amount and tipping behavior, 
# and whether gender plays a role in tipping patterns
sns.scatterplot(x="total_bill", y="tip", hue="sex", data=tips)
plt.title("Scatter plot of Total Bill vs Tip")
plt.show()


In [None]:
# Create a bar plot to visualize the average age of passengers by passenger class (Pclass) and gender (Sex)
# The x-axis shows the passenger class (1st, 2nd, 3rd class)
# The y-axis shows the average age
# Different colors represent male and female passengers
# This helps analyze if there are differences in age distribution across classes and genders
sns.barplot(x='Pclass', y='Age', hue='Sex', data=data)
plt.title("AverageAge by Class and Gender")
plt.show()


In [None]:
# Create a box plot to visualize the age distribution by gender
# The x-axis shows the gender categories (male/female)
# The y-axis shows the age values
# The box plot displays the median, quartiles, and potential outliers in the age distribution for each gender
# This helps analyze if there are differences in age distribution between male and female passengers
sns.boxplot(x='Sex', y='Age', data=data)
plt.title("Age Distribution by Sex")
plt.show()


In [None]:
# Create a box plot to visualize the age distribution by gender and survival status
# The x-axis shows the gender categories (male/female)
# The y-axis shows the age values
# The hue parameter splits the data by survival status (0 = did not survive, 1 = survived)
# This helps analyze if there are differences in age distribution between survivors and non-survivors within each gender
sns.boxplot(x='Sex', y='Age', hue='Survived', data=data)
plt.title("Age distribution by Sex and Survival")
plt.show()


In [None]:
# Create a distribution plot for age of non-survivors (Survived = 0)
# The plot shows the probability density of ages for passengers who did not survive
# hist=False removes the histogram bars, showing only the density curve
# color="blue" represents non-survivors
sns.distplot(data[data['Survived'] == 0]['Age'], hist=False, color="blue")
sns.distplot(data[data['Survived'] == 1]['Age'], hist=False, color="orange") 
plt.show()

In [None]:
# Create a cross-tabulation (contingency table) between passenger class (Pclass) and survival status (Survived)
# This shows the frequency distribution of survival across different passenger classes
# Rows represent passenger classes (1st, 2nd, 3rd class)
# Columns represent survival status (0 = did not survive, 1 = survived)
pd.crosstab(data['Pclass'], data['Survived'])

In [None]:
# Create a heatmap visualization of the cross-tabulation between passenger class and survival status
# The heatmap uses color intensity to represent the frequency of passengers in each class who survived/did not survive
# Darker colors indicate higher frequencies, making it easy to spot patterns in survival rates across classes
sns.heatmap(pd.crosstab(data['Pclass'], data['Survived']))

In [None]:
# Create a clustered heatmap visualization of the cross-tabulation between number of parents/children aboard (Parch) and survival status
# The clustermap not only shows the frequency distribution but also clusters similar patterns together
# This helps identify if there are any patterns in survival rates based on the number of parents/children a passenger had aboard
sns.clustermap(pd.crosstab(data['Parch'], data['Survived']))
plt.show()