In [1]:
!pip install pandas
!pip install faiss-cpu
!pip install numpy
!pip install sentence-transformers
!pip install matplotlib
!pip install seaborn



In [2]:
import pandas as pd
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

In [4]:
df = pd.read_csv("hotel_bookings.csv")

In [5]:
df.dropna(inplace=True)
df["arrival_date"] = pd.to_datetime(df["arrival_date_year"].astype(str) + "-" +
                                      df["arrival_date_month"] + "-" +
                                      df["arrival_date_day_of_month"].astype(str))
df["total_revenue"] = df["adr"] * (df["stays_in_weekend_nights"] + df["stays_in_week_nights"])

In [6]:
if "hotel" not in df.columns:
    raise KeyError("Column 'hotel' not found in dataset")

In [7]:
revenue_trend = df.groupby(df["arrival_date"].dt.to_period("M"))["total_revenue"].sum().reset_index()
analytics = {
    "revenue_trend": revenue_trend.set_index("arrival_date")["total_revenue"].to_dict(),
    "cancellation_rate": df["is_canceled"].mean() * 100,
    "geographical_distribution": df["country"].value_counts().to_dict(),
    "booking_lead_time_distribution": df["lead_time"].describe().to_dict()
}

In [8]:
with open("analytics.pkl", "wb") as f:
    pickle.dump(analytics, f)

In [9]:
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
data_texts = df[["hotel", "arrival_date"]].astype(str).agg(' '.join, axis=1)
data_embeddings = model.encode(data_texts.tolist())
index = faiss.IndexFlatL2(data_embeddings.shape[1])
index.add(np.array(data_embeddings))

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [10]:
faiss.write_index(index, "faiss_index.bin")

In [11]:
with open("sentence_transformer.pkl", "wb") as f:
    pickle.dump(model, f)