In [47]:
import requests
import time

---
### **Testing Analytics Endpoint**

In [50]:
s = time.time()
response = requests.post("http://127.0.0.1:5000/analytics")
e = time.time()
print(f"Time to get response: {e-s}")

Time to get response: 4.0937652587890625


In [51]:
data = response.json()

In [52]:
data

{'arrival_distribution': 'iVBORw0KGgoAAAANSUhEUgAAA+gAAAH0CAYAAACuKActAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAACCk0lEQVR4nOzdd1yV9f//8ecBARURRQLEXZkLt+XMlaK4s1LDMEvR0hyp1ceGaUPL0XJl7m2Ze4Qjd84sKkdq5UxQUwRFBYT37w9/XF+O4CrhHPFxv93O7ca5rve5rtd13pzxPO9r2IwxRgAAAAAAwKFcHF0AAAAAAAAgoAMAAAAA4BQI6AAAAAAAOAECOgAAAAAAToCADgAAAACAEyCgAwAAAADgBAjoAAAAAAA4AQI6AAAAAABOgIAOAAAAAIATIKADAAAAAOAECOgAAAAAADgBAjoAAAAAAE6AgA4AAAAAgBMgoAMAAAAA4AQI6AAAAAAAOAECOgAAAAAAToCADgAAAACAEyCgAwAAAADgBAjoAAAAAAA4AQI6AAAAAABOgIAOAAAAAIATIKADAAAAAOAECOgAAAAAADgBAjoAAAAAAE6AgA4AAAAAgBMgoAMAAAAA4AQI6AAAAAAAOAECOgAAAAAAToCADgAAAACAEyCgAwAAAADgBAjoAAAAAAA4AQI6AAAAAABOgIAOAAAAAIATIKADAAAAAOAECOgAAAAAADgBAjoAAAAAAE6AgA4AAAAAgBMgoAMAAAAA4AQI6AAAAAAAOAECOgA40BdffCGbzaagoKC7utzBgwfLZrPd1WVer3PnzipevPhttbPZbNbN09NTxYsXV6tWrTR16lQlJCSke0z9+vVVv379O6pn3759Gjx4sI4cOXJHj7t+XUeOHJHNZtPIkSPvaDm3MnToUC1evDjd9A0bNshms2nDhg13dX23o379+nf9fy8jxYsXt/rfxcVF3t7eKl

In [53]:
import base64

In [54]:
def save_plots(save_path: str = "tests/analytics_plots/"): 
    for key, value in data.items():
        with open(f"{save_path}/{key}.png", "wb") as f:
            f.write(base64.b64decode(value))
        print(f"Saved {key}.png")

In [55]:
save_plots()

Saved arrival_distribution.png
Saved booking_lead_time.png
Saved booking_trends_by_month.png
Saved cancellation_by_customer_type.png
Saved cancellation_rate.png
Saved cancellation_rate_by_segment.png
Saved cancellation_rate_vs_lead_time.png
Saved geographical_distribution.png
Saved holiday_vs_non_holiday.png
Saved market_segment_distribution.png
Saved revenue_by_channel.png
Saved revenue_trends.png
Saved room_type_distribution.png
Saved special_requests_vs_cancellation.png
Saved weekend_vs_weekday.png


You can find the plots at tests/analytics_plots/ dir

---
### **Testing ask endpoint**

**Sample Queries**

- What was the total revenue for hotel type 'A' in March 2016?
- whats the average waiting list days for the year 2016 for the month of May?
- What was the overall cancellation rate?
- which country has more than 200 cancellations?
- What was the average stay duration?

In [56]:
q1 = "What was the overall cancellation rate?"
q2 = "which country has more than 200 cancellations?"
q3 = "What was the average stay duration?"

In [58]:
s = time.time()
a1 = requests.post("http://127.0.0.1:5000/ask", json={"query": q1})
e = time.time()
print(f"Time to get response from RAG: {e-s}")

a2 = requests.post("http://127.0.0.1:5000/ask", json={"query": q2})
a3 = requests.post("http://127.0.0.1:5000/ask", json={"query": q3})

Time to get response from RAG: 0.6309547424316406


In [59]:
r1 = a1.json()
r2 = a2.json()
r3 = a3.json()

In [60]:
# print the answers with queries

print(
    f"Query: {q1}\nAnswer: {r1['response']}\n\n"
    f"Query: {q2}\nAnswer: {r2['response']}\n\n"
    f"Query: {q3}\nAnswer: {r3['response']}"
)

Query: What was the overall cancellation rate?
Answer: The overall cancellation rate for hotel bookings was 37.04%.

Query: which country has more than 200 cancellations?
Answer: According to the provided data, the country AGO has a total of 205 cancellations, which is more than 200.

Query: What was the average stay duration?
Answer: The average stay duration for hotel bookings was 3.43 days.


**Lets Verify with the original dataset**

In [76]:
import pandas as pd 

In [None]:
data = pd.read_csv("data/raw/hotel_bookings.csv")

In [82]:
data.is_canceled

0         0
1         0
2         0
3         0
4         0
         ..
119385    0
119386    0
119387    0
119388    0
119389    0
Name: is_canceled, Length: 119390, dtype: int64

In [85]:
## FOR Q1
# Calculate overall cancellation rate
data.is_canceled.mean()

0.37041628277075134

In [93]:
## FOR Q2 
# Calculate the number of cancellations per country
cancellations_per_country = data.groupby("country").is_canceled.sum()
cp = cancellations_per_country[cancellations_per_country > 200]
# print first country 
cp.index[0]

'AGO'

In [96]:
## FOR Q3
# Calculate the average stay duration
data["total_stay"] = data.stays_in_weekend_nights + data.stays_in_week_nights
data.total_stay.mean()

3.4279001591423066