In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams.update({'figure.facecolor': 'white', 'axes.facecolor': 'white'})
plt.style.use('default')

data = pd.read_csv(r"H:\Hazoom\Videos\Courses\ML - Cellula\Week 1\Task\first inten project.csv")
data.drop("Booking_ID", axis = 1, inplace = True)
pd.set_option('display.max_columns', None)
print(data)

In [None]:
print(data.columns)
print(data['market segment type'].unique())
print(data['booking status'].unique())

In [None]:
data['count'] = 1

In [None]:
data['total number of nights'] = data['number of week nights'] + data['number of weekend nights']
print(data['total number of nights'].max())
data['nights_group'] = pd.cut(data['total number of nights'], bins=[0, 2, 5, 100], labels=['0-2 nights', '3-5 nights', '6 or more nights'])

In [None]:
data['total number of guests'] = data['number of adults'] + data['number of children']
print(data['total number of guests'].max())
data['family_group'] = pd.cut(data['total number of guests'], bins=[0, 1, 3, 100], labels = ['Single', 'Small Family (2-3)', 'Big Family (4+ members)'])

In [None]:
data['average price']=data['average price '].apply(lambda n: int(n))
data['price ranges'] = pd.cut(data['average price'], bins=[0, 50, 100, 200, 1000], labels=['Low (0-50$)', 'Intermediate (51-100$)', 'Higher (101-200$)', 'Very High (+200$)'])

In [None]:
data['days before arrival'] = pd.cut(data['lead time'], bins=[0, 6, 13, 29, 59, 89, 179, 269, 364, 1000], labels=['less than one week', 'less than two weeks', 'less than one month', 'less than two months', 'less than three months', 'less than six months','less than nine months', 'less than one year', 'more than a year'])

In [None]:
x = data['booking status'].unique()
y = data['booking status'].value_counts()
plt.bar(x,y)
plt.title("How many Cancelled")
plt.xlabel("Booking Status")
plt.ylabel("Numbers")
plt.show()

In [None]:
data['date of reservation'] = data['date of reservation'].replace('2018-2-29', '2/28/2018') #Discovered after having an error

data['date of reservation'] = pd.to_datetime(data['date of reservation'], errors = 'coerce')
data['month_year'] = data['date of reservation'].dt.strftime('%B %Y')
data = data.sort_values('date of reservation')

#Check
print(data['month_year'].head(10))
print(data['month_year'].tail(10))

In [None]:
data['month_year_dt'] = pd.to_datetime(data['month_year'], format='%B %Y')
reservations_per_month = (
    data.groupby(['month_year_dt', 'month_year'])
    .size()
    .sort_index(level='month_year_dt')
    .reset_index(level=0, drop=True)
)

plt.figure(figsize=(12, 6), facecolor = 'white')
reservations_per_month.plot(kind='bar', color='steelblue')
plt.title("Reservations per Month")
plt.xlabel("Month and Year")
plt.ylabel("Number of Reservations")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
monthly_counts = data.groupby(['month_year', 'booking status']).size().unstack()

monthly_counts.index  = pd.to_datetime(monthly_counts.index, format = '%B %Y')
monthly_counts = monthly_counts.sort_index()
monthly_counts.index = monthly_counts.index.strftime('%b %Y')

plt.figure(figsize = (12,6))

sns.lineplot(
    data = monthly_counts['Not_Canceled'],
    label = 'Confirmed',
    marker = '*',
    color = 'green',
    markersize = 10,
    linewidth = 3
)
sns.lineplot(
    data = monthly_counts['Canceled'],
    label = 'Canceled',
    marker = 'o',
    color = 'red',
    markersize = 8,
    linewidth=3
)

plt.title('Canceled and Not-Canceled Count for Each Month', fontsize=14, fontweight='bold', pad=20)
plt.xlabel('Month and Year',fontsize=12, labelpad=10)
plt.ylabel('Count',fontsize=12, labelpad=10)
plt.xticks(rotation = 45, ha = 'right')

plt.legend(title='Booking Status',
           frameon=True,
           shadow=True,
           title_fontsize=12)

plt.tight_layout()
plt.show()

In [None]:
pt_1 = pd.pivot_table(
    data,
    index = "market segment type",
    columns = "booking status",
    values = "count",
    aggfunc = 'count',
    fill_value = 0
)
print(pt_1)

In [None]:
ptPlot_1 = pt_1.plot(kind = "bar", stacked = False, figsize = (10,6))
plt.xlabel("Market Segment Type")
plt.ylabel("Count of Confirmed vs Cancelled")
plt.title("Booking Status for Each Segment Type")
plt.xticks(rotation = 0, ha='center')
plt.yticks(range(0, 14000 + 1000,1000))
plt.legend(title='Booking Status', bbox_to_anchor=(1.05, 1))

for p in ptPlot_1.patches:
    ptPlot_1.annotate(f"{p.get_height():.0f}",
               (p.get_x() + p.get_width() / 2., p.get_height()),
               ha='center', va='center',
               xytext=(0, 5),
               textcoords='offset points')

plt.tight_layout()
plt.show()


In [None]:
pt_2 = pd.pivot_table(
    data,
    columns = "booking status",
    index = "nights_group",
    observed = True,
    values = "count",
    aggfunc = 'count',
    fill_value = 0
)
print(pt_2)

In [None]:
ptPlot_2 = pt_2.plot(kind = "bar", stacked = False, figsize = (10,6))
plt.xlabel("Duration of Staying")
plt.ylabel("Count of Confirmed vs Cancelled")
plt.title("Booking Status for Different Durations")
plt.xticks(rotation = 0, ha='center')
plt.yticks(range(0, 14000 + 1000,1000))
plt.legend(title='Booking Status', bbox_to_anchor=(1, 1))

for p in ptPlot_2.patches:
    ptPlot_2.annotate(f"{p.get_height():.0f}",
               (p.get_x() + p.get_width() / 2., p.get_height()),
               ha='center', va='center',
               xytext=(0, 5),
               textcoords='offset points')

plt.tight_layout()
plt.show()

In [None]:
pt_3 = pd.pivot_table(
    data,
    columns = "booking status",
    index = "family_group",
    observed = True,
    values = "count",
    aggfunc = 'count',
    fill_value = 0
)
print(pt_3)

In [None]:
ptPlot_3 = pt_3.plot(kind = "bar", stacked = False, figsize = (10,6))
plt.xlabel("Number of Guests")
plt.ylabel("Count of Confirmed vs Cancelled")
plt.title("Booking Status for Different Guest Numbers")
plt.xticks(rotation = 0, ha='center')
plt.yticks(range(0, 14000 + 1500,1500))
plt.legend(title='Booking Status', bbox_to_anchor=(1, 1))

for p in ptPlot_3.patches:
    ptPlot_3.annotate(f"{p.get_height():.0f}",
               (p.get_x() + p.get_width() / 2., p.get_height()),
               ha='center', va='center',
               xytext=(0, 5),
               textcoords='offset points')

plt.tight_layout()
plt.show()

In [None]:
pt_4 = pd.pivot_table(
    data,
    columns = "booking status",
    index = "days before arrival",
    observed = True,
    values = "count",
    aggfunc = 'count',
    fill_value = 0
)
print(pt_4)


In [None]:
plt.figure(figsize = (12,6))

sns.lineplot(
    data = pt_4['Not_Canceled'],
    label = 'Confirmed',
    marker = '*',
    color = 'green',
    markersize = 10,
    linewidth = 3
)
sns.lineplot(
    data = pt_4['Canceled'],
    label = 'Canceled',
    marker = 'o',
    color = 'red',
    markersize = 8,
    linewidth=3
)

plt.title('Booking Status Based on Lead Time', fontsize=14, fontweight='bold', pad=20)
plt.xlabel('Amount of Time Between Reservation and Actual Staying',fontsize=12, labelpad=10)
plt.ylabel('Count',fontsize=12, labelpad=10)
plt.xticks(rotation = 30, ha = 'right')

plt.legend(title='Booking Status',
           frameon=True,
           shadow=True,
           title_fontsize=12)

plt.tight_layout()
plt.show()

In [None]:
pt_5 = pd.pivot_table(
    data,
    columns = "booking status",
    index = "price ranges",
    observed = True,
    values = "count",
    aggfunc = 'count',
    fill_value = 0
)
print(pt_5)

In [None]:
ptPlot_5 = pt_5.plot(kind = "bar", stacked = False, figsize = (10,6))
plt.xlabel("Price Ranges")
plt.ylabel("Count of Confirmed vs Cancelled")
plt.title("Booking Status for Different Price Ranges")
plt.xticks(rotation = 0, ha='center')
plt.yticks(range(0, 14000 + 1000,1000))
plt.legend(title='Booking Status', bbox_to_anchor=(1, 1))

for p in ptPlot_5.patches:
    ptPlot_5.annotate(f"{p.get_height():.0f}",
               (p.get_x() + p.get_width() / 2., p.get_height()),
               ha='center', va='center',
               xytext=(0, 5),
               textcoords='offset points')

plt.tight_layout()
plt.show()