In [1]:
# Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [2]:
# load data
df = pd.read_csv('../../data/H2.csv')

We saw some NULL values in the columns `Agent` and `Company`.<br>However these *NULL* values presented as one of the categories. This should not be considered a missing value, but rather as “not applicable”.<br>**For example,** if a booking “Agent” is defined as “NULL” it means that the booking did not came from a travel agent.

In [4]:
# Missing Value Analysis
missing_values = df.isnull().sum()
missing_percentage = (missing_values / len(df)) * 100

# Missing Value Report
missing_report = pd.DataFrame({
    "Missing Values": missing_values,
    "Percentage (%)": missing_percentage
}).sort_values(by="Missing Values", ascending=False)

missing_report[missing_report["Missing Values"] > 0]

Unnamed: 0,Missing Values,Percentage (%)
Country,24,0.030253
Children,4,0.005042


- `Children`: We can fill in missing values ​​with 0 (no children).
- `Country`: The "Unknown" category can be added for missing country information.

In [9]:
# Handle missing values
df['Children'].fillna(0, inplace=True)  # Fill missing 'Children' with 0
df['Country'].fillna('Unknown', inplace=True)  # Fill missing 'Country' with 'Unknown'

# Verify there are no missing values left
missing_values_after = df.isnull().sum()
missing_values_after[missing_values_after > 0]


Series([], dtype: int64)

In order for a room to be reserved, one of the values ​​of adult, children or baby must be different from 0.
- If there is a field that is given as 0 for all three, let's delete it.

In [13]:
# adults, babies and children cant be zero at same time
print(df[(df['Adults'] == 0) & (df['Babies'] == 0) & (df['Children'] == 0)].shape[0])   # 159 rows

# Drop rows where adults, babies and children are zero at the same time
df_clean = df[(df['Adults'] != 0) | (df['Babies'] != 0) | (df['Children'] != 0)] 

167
