In [1]:
import pandas as pd
import seaborn as sns

# Part 1: Load the Titanic Dataset & Check Missing Values
# Load the Titanic dataset using seaborn's built-in dataset
df = sns.load_dataset('titanic')

# Check for missing values in the 'Age' column
missing_age = df['age'].isnull().sum()
print(f"Missing values in 'Age' column: {missing_age}")

# Part 2: Identify Duplicates & Inconsistencies
# Identify duplicate rows in the Titanic dataset
duplicate_rows = df[df.duplicated()]

# Print duplicate rows (if any)
print("\nDuplicate Rows in the Titanic Dataset:")
print(duplicate_rows)

# Part 3: Generate a Data Quality Report
# 1. Missing Values Summary
missing_values = df.isnull().sum()

# 2. Number of Duplicate Rows
duplicate_count = df.duplicated().sum()

# 3. Basic Statistics for Numerical Columns
numerical_stats = df.describe()

# Create the Data Quality Report
report = {
    "Missing Values": missing_values[missing_values > 0],
    "Duplicate Rows": duplicate_count,
    "Numerical Statistics": numerical_stats
}

# Print the Data Quality Report
print("\nData Quality Report for Titanic Dataset:")
print("-------------------------------------------------")
print("Missing Values:")
print(report["Missing Values"])
print("\nNumber of Duplicate Rows:", report["Duplicate Rows"])
print("\nBasic Statistics for Numerical Columns:")
print(report["Numerical Statistics"])


Missing values in 'Age' column: 177

Duplicate Rows in the Titanic Dataset:
     survived  pclass     sex   age  sibsp  parch     fare embarked   class  \
47          1       3  female   NaN      0      0   7.7500        Q   Third   
76          0       3    male   NaN      0      0   7.8958        S   Third   
77          0       3    male   NaN      0      0   8.0500        S   Third   
87          0       3    male   NaN      0      0   8.0500        S   Third   
95          0       3    male   NaN      0      0   8.0500        S   Third   
..        ...     ...     ...   ...    ...    ...      ...      ...     ...   
870         0       3    male  26.0      0      0   7.8958        S   Third   
877         0       3    male  19.0      0      0   7.8958        S   Third   
878         0       3    male   NaN      0      0   7.8958        S   Third   
884         0       3    male  25.0      0      0   7.0500        S   Third   
886         0       2    male  27.0      0      0  13.0