In [2]:
# booking_patterns.py
# Requires: pandas, matplotlib
# Run: python booking_patterns.py

import pandas as pd
import matplotlib.pyplot as plt
import sys
from pathlib import Path

# ---------- CONFIG ----------
file_path = 'Hotel_bookings_final.csv'  # change if required
save_plots = False   # set True to save PNGs to disk
out_dir = Path('/mnt/data/plots')
out_dir.mkdir(parents=True, exist_ok=True)
plt.rcParams.update({
    'figure.figsize': (8, 6),
    'axes.titlesize': 16,
    'axes.labelsize': 12,
    'xtick.labelsize': 11,
    'ytick.labelsize': 11
})

# ---------- LOAD ----------
try:
    df = pd.read_csv(file_path)
except FileNotFoundError:
    print(f"File not found: {file_path}", file=sys.stderr)
    raise

# Ensure required columns exist
required_cols = {'booking_status', 'channel_of_booking', 'room_type', 'star_rating'}
missing = required_cols - set(df.columns)
if missing:
    raise KeyError(f"Missing required columns: {missing}")

# Filter to Confirmed bookings
confirmed = df[df['booking_status'] == 'Confirmed'].copy()

# Helper to compute percentage distribution (0-100)
def percent_series(series, order=None):
    s = (series.value_counts(normalize=True) * 100)
    if order:
        # preserve order; fill missing with 0
        return pd.Series({k: s.get(k, 0.0) for k in order})
    return s.sort_values(ascending=False)

# ---------- CALCULATIONS ----------
# By channel (Web, Android, iOS expected — but handle any categories)
channel_order = ['Web', 'Android', 'iOS']  # you can change order if needed
channel_pct = percent_series(confirmed['channel_of_booking'], order=channel_order)

# By room type (Standard, Deluxe, Suite expected)
room_order = ['Standard', 'Deluxe', 'Suite']
room_pct = percent_series(confirmed['room_type'], order=room_order)

# By star rating (2,3,4,5 maybe present) — sort numeric
# Convert star_rating to int (if possible)
try:
    confirmed['star_rating'] = confirmed['star_rating'].astype(int)
except Exception:
    # if already int or non-numeric, ignore
    pass
star_pct = percent_series(confirmed['star_rating'])
star_pct = star_pct.sort_index()  # ascending by star rating

# Print numeric summaries
print("\n=== Confirmed bookings (percent) ===")
print("By channel:\n", channel_pct.round(2).to_dict())
print("By room type:\n", room_pct.round(2).to_dict())
print("By star rating:\n", star_pct.round(2).to_dict())

# ---------- PLOTS ----------
# Individual charts
# 1) Channel
plt.figure()
channel_pct.plot(kind='bar')
plt.title('Confirmed Bookings by Channel')
plt.ylabel('Percentage (%)')
plt.xlabel('Channel')
plt.ylim(0, max(channel_pct.max()*1.15, 10))
plt.grid(axis='y', linestyle='--', alpha=0.4)
plt.tight_layout()
if save_plots:
    plt.savefig(out_dir / 'confirmed_by_channel.png', dpi=150)
plt.show()

# 2) Room Type
plt.figure()
room_pct.plot(kind='bar')
plt.title('Confirmed Bookings by Room Type')
plt.ylabel('Percentage (%)')
plt.xlabel('Room Type')
plt.ylim(0, max(room_pct.max()*1.15, 10))
plt.grid(axis='y', linestyle='--', alpha=0.4)
plt.tight_layout()
if save_plots:
    plt.savefig(out_dir / 'confirmed_by_room_type.png', dpi=150)
plt.show()

# 3) Star Rating
plt.figure()
star_pct.plot(kind='bar')
plt.title('Confirmed Bookings by Star Rating')
plt.ylabel('Percentage (%)')
plt.xlabel('Star Rating')
plt.ylim(0, max(star_pct.max()*1.15, 10))
plt.grid(axis='y', linestyle='--', alpha=0.4)
plt.tight_layout()
if save_plots:
    plt.savefig(out_dir / 'confirmed_by_star_rating.png', dpi=150)
plt.show()

# 4) Combined side-by-side for quick comparison
fig, axes = plt.subplots(1, 3, figsize=(18, 5), sharey=True)
# Channel
channel_pct.plot(kind='bar', ax=axes[0])
axes[0].set_title('By Channel')
axes[0].set_xlabel('')
axes[0].set_ylabel('Percentage (%)')
axes[0].grid(axis='y', linestyle='--', alpha=0.4)
# Room
room_pct.plot(kind='bar', ax=axes[1])
axes[1].set_title('By Room Type')
axes[1].set_xlabel('')
axes[1].grid(axis='y', linestyle='--', alpha=0.4)
# Star
star_pct.plot(kind='bar', ax=axes[2])
axes[2].set_title('By Star Rating')
axes[2].set_xlabel('')
axes[2].grid(axis='y', linestyle='--', alpha=0.4)

plt.suptitle('Confirmed Booking Patterns (Channels | Room Type | Star Rating)', fontsize=18)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
if save_plots:
    plt.savefig(out_dir / 'confirmed_patterns_combined.png', dpi=180)
plt.show()

# ---------- OPTIONAL: return dataframes if used in notebook ----------
# If you import this script into a notebook, you can use the computed series:
# channel_pct, room_pct, star_pct


OSError: [Errno 30] Read-only file system: '/mnt'