# Encoding Categorical Variables - Hotel Reservations

Applies Label Encoding and One-Hot Encoding for categorical features.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv("Hotel Reservations.csv")
df_encoded = df.copy()
print(f"Shape: {df.shape}")
df.head()

Shape: (36275, 19)


Unnamed: 0,Booking_ID,no_of_adults,no_of_children,no_of_weekend_nights,no_of_week_nights,type_of_meal_plan,required_car_parking_space,room_type_reserved,lead_time,arrival_year,arrival_month,arrival_date,market_segment_type,repeated_guest,no_of_previous_cancellations,no_of_previous_bookings_not_canceled,avg_price_per_room,no_of_special_requests,booking_status
0,INN00001,2,0,1,2,Meal Plan 1,0,Room_Type 1,224,2017,10,2,Offline,0,0,0,65.0,0,Not_Canceled
1,INN00002,2,0,2,3,Not Selected,0,Room_Type 1,5,2018,11,6,Online,0,0,0,106.68,1,Not_Canceled
2,INN00003,1,0,2,1,Meal Plan 1,0,Room_Type 1,1,2018,2,28,Online,0,0,0,60.0,0,Canceled
3,INN00004,2,0,0,2,Meal Plan 1,0,Room_Type 1,211,2018,5,20,Online,0,0,0,100.0,0,Canceled
4,INN00005,2,0,1,1,Not Selected,0,Room_Type 1,48,2018,4,11,Online,0,0,0,94.5,0,Canceled


In [3]:
# Identify categorical features
categorical_cols = ['type_of_meal_plan', 'room_type_reserved', 'market_segment_type']
target = 'booking_status'

for col in categorical_cols:
    print(f"{col}: {df[col].unique()}")

type_of_meal_plan: ['Meal Plan 1' 'Not Selected' 'Meal Plan 2' 'Meal Plan 3']
room_type_reserved: ['Room_Type 1' 'Room_Type 4' 'Room_Type 2' 'Room_Type 6' 'Room_Type 5'
 'Room_Type 7' 'Room_Type 3']
market_segment_type: ['Offline' 'Online' 'Corporate' 'Aviation' 'Complementary']


In [4]:
# Encode target
df_encoded['booking_status_encoded'] = (df['booking_status'] == 'Canceled').astype(int)
print("Target encoding: Not_Canceled=0, Canceled=1")
print(df_encoded['booking_status_encoded'].value_counts())

Target encoding: Not_Canceled=0, Canceled=1
booking_status_encoded
0    24390
1    11885
Name: count, dtype: int64


In [5]:
# Label Encoding
for col in categorical_cols:
    le = LabelEncoder()
    df_encoded[f'{col}_label'] = le.fit_transform(df[col])
    print(f"\n{col}:")
    for cat, code in zip(le.classes_, le.transform(le.classes_)):
        print(f"  {cat} → {code}")


type_of_meal_plan:
  Meal Plan 1 → 0
  Meal Plan 2 → 1
  Meal Plan 3 → 2
  Not Selected → 3

room_type_reserved:
  Room_Type 1 → 0
  Room_Type 2 → 1
  Room_Type 3 → 2
  Room_Type 4 → 3
  Room_Type 5 → 4
  Room_Type 6 → 5
  Room_Type 7 → 6

market_segment_type:
  Aviation → 0
  Complementary → 1
  Corporate → 2
  Offline → 3
  Online → 4


In [6]:
# One-Hot Encoding
df_onehot = df_encoded.copy()
for col in categorical_cols:
    dummies = pd.get_dummies(df_onehot[col], prefix=col)
    df_onehot = pd.concat([df_onehot, dummies], axis=1)

print(f"Label encoded shape: {df_encoded.shape}")
print(f"One-hot encoded shape: {df_onehot.shape}")

Label encoded shape: (36275, 23)
One-hot encoded shape: (36275, 39)


In [7]:
# Save datasets
df_encoded.to_csv('Hotel_Reservations_Label_Encoded.csv', index=False)
df_onehot.to_csv('Hotel_Reservations_OneHot_Encoded.csv', index=False)
print("✓ Datasets saved")

✓ Datasets saved
