Titanic Data Visualizations
This notebook includes various visualizations from the Titanic dataset using Matplotlib and Seaborn.

In [10]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [11]:
# Load the dataset (replace 'titanic.csv' with your dataset file if needed)
df = pd.read_csv('titanic.csv')
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


Histogram for Age


In [12]:
plt.figure(figsize=(8, 6))
sns.histplot(df['Age'], bins=30, kde=True)
plt.title('Distribution of Age')
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.savefig('histogram_age.png')  # Save as PNG file
plt.close()

Histogram for Fare

In [13]:
plt.figure(figsize=(8, 6))
sns.histplot(df['Fare'], bins=30, kde=True)
plt.title('Distribution of Fare')
plt.xlabel('Fare')
plt.ylabel('Frequency')
plt.savefig('histogram_fare.png')  # Save as PNG file
plt.close()

Boxplot: Age by Survival Status

In [14]:
plt.figure(figsize=(8, 6))
sns.boxplot(x='Survived', y='Age', data=df)
plt.title('Age Distribution by Survival Status')
plt.xlabel('Survived (0 = No, 1 = Yes)')
plt.ylabel('Age')
plt.savefig('boxplot_age_survival.png')
plt.close()

Boxplot: Fare by Survival Status

In [15]:
plt.figure(figsize=(8, 6))
sns.boxplot(x='Survived', y='Fare', data=df)
plt.title('Fare Distribution by Survival Status')
plt.xlabel('Survived (0 = No, 1 = Yes)')
plt.ylabel('Fare')
plt.savefig('boxplot_fare_survival.png')
plt.close()

Scatterplot: Age vs Fare by Survival Status

In [16]:
plt.figure(figsize=(8, 6))
sns.scatterplot(x='Age', y='Fare', data=df, hue='Survived', palette='coolwarm', alpha=0.7)
plt.title('Age vs. Fare by Survival Status')
plt.xlabel('Age')
plt.ylabel('Fare')
plt.savefig('scatterplot_age_fare.png')
plt.close()

Pairplot: Age, Fare, Pclass, SibSp by Survival Status

In [17]:
sns.pairplot(df[['Age', 'Fare', 'Pclass', 'SibSp', 'Survived']], hue='Survived', palette='coolwarm')
plt.suptitle('Pairwise Relationships by Survival Status', y=1.02)
plt.savefig('pairplot_relationships.png')
plt.close()

Heatmap: Correlation between Numeric Columns

In [18]:
plt.figure(figsize=(10, 8))
corr_matrix = df[['Age', 'Fare', 'SibSp', 'Parch', 'Survived']].corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('Correlation Heatmap')
plt.savefig('correlation_heatmap.png')
plt.close()