# **FINDING MISSING VALUES**

#### IMPORT REQUIRED LIBRARIES

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

#### LOAD THE DATASET

In [None]:
file_path = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/n01PQ9pSmiRX6520flujwQ/survey-data.csv"

df = pd.read_csv(file_path)

In [None]:
df.head()

#### BASIC SUMMARY OF DATASET

In [None]:
df.describe()

#### FINDING MISSING VALUES

In [None]:
# ---Finding missing values for all columns---
# Filter and convert to int (True =1 and False=0)
missing_value = df.isna().astype(int)
print(f"Number of missing values:\n{df.isna().sum()}")
missing_value

In [None]:
# ---Visualize Missing Data---
# Seaborn version
plt.figure(figsize=(12,8))
sns.heatmap(df.isna(), cbar=False, cmap='viridis', yticklabels=False)
plt.show()

# Plotly version
import plotly.express as px
# Create figure
fig = px.imshow(missing_value,
                 labels=dict(x="Columns", y="Row Index", color="Missing?"),
                 x=missing_value.columns,
                 color_continuous_scale=[[0, 'rgb(50,50,50)'], [1, 'yellow']],
                 title="Heatmap of Missing Values (Interactive)")

fig.update_layout(yaxis={'showticklabels': False})
fig.show()

In [None]:
# Count missing values in 'Employment' column
print(f"Number of missing employment data: {df['Employment'].isna().sum()}")

#### IMPUTING MISSING VALUES

In [None]:
# Find most frequent value in "Employmet" column
emp_mode = df['Employment'].mode()[0]
print(f"The most frequent employment data: {emp_mode}")

In [None]:
# Impute missing value with mode of Employment column
df['Employment'] = df['Employment'].fillna(emp_mode)

print(f"Number of missing value after imputation: {df['Employment'].isna().sum()}")

In [None]:
# Visualize imputed data
freq_emp = df['Employment'].value_counts().nlargest(10)

plt.figure(figsize=(12,8))
sns.barplot(y=freq_emp.index, x=freq_emp.values, palette='viridis')
plt.xlabel("Count")
plt.ylabel("Employment Status")
plt.show()