In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
ds = pd.read_csv("/kaggle/input/hotel-reservations-classification-dataset/Hotel Reservations.csv")

ds.head()

In [None]:
ds.info()

In [None]:
ds.drop(["Booking_ID"], axis = 1, inplace = True)

In [None]:
ds

In [None]:
ds.isna().sum()

In [None]:
ds.describe().transpose()

In [None]:
fig, ax = plt.subplots(7, 2, figsize = (30, 60))

for i, ax in zip(ds.select_dtypes(['int64', 'float64']).columns, ax.flat[:]):
    sns.violinplot(x = ds[i], ax = ax)

In [None]:
fig, ax = plt.subplots(2, 2, figsize = (30, 20))

for col, ax in zip(ds.select_dtypes(['object']), ax.flat[:]):
    sns.countplot(data = ds, x = col, ax = ax, hue = ds['booking_status'])

In [None]:
from sklearn.preprocessing import LabelEncoder

for col in ds.select_dtypes("object"):
    encoder = LabelEncoder()
    ds[col] = encoder.fit_transform(ds[col])
    
ds.info()

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(ds.iloc[:, :-1], ds.iloc[:, -1], test_size = .3)

x_train.shape, x_test.shape

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings("ignore")

models = {
    "Logistic": LogisticRegression(),
    "rfClassifier": RandomForestClassifier(),
    "tree": DecisionTreeClassifier(max_depth = 5, criterion = "gini"),
    "knClassifier": KNeighborsClassifier(n_neighbors = 5),
    "gBoost": GradientBoostingClassifier(),
    "Ada Boost": AdaBoostClassifier(n_estimators = 150),
    "Bagging": BaggingClassifier(n_estimators = 150),
    "xgBoost": XGBClassifier(),
    "catBoost": CatBoostClassifier(logging_level = "Silent"),
    "lightGBM": LGBMClassifier(),
    "svm": SVC(),
}

accuracy_scores = []
predicted = []

for i in models:
    models[i].fit(x_train, y_train)
    y_pred = models[i].predict(x_test)
    accuracy_scores.append(int(accuracy_score(y_pred, y_test) * 100))
    predicted.append(y_pred)

for j, k in zip(accuracy_scores, models):
    print (' ', k, ' accuracy : ', j, ' %  ')

In [None]:
plt.figure(figsize = (25, 8))
ax = sns.barplot(x = list(models.keys()), y = accuracy_scores)
for i in ax.patches:
    width, height = i.get_width(), i.get_height()
    x, y = i.get_xy()
    ax.annotate(f'{round(height,2)}%', (x + width/2, y + height*1.02), ha='center')

print (list(models.keys())[accuracy_scores.index(max(accuracy_scores))], " : " ,max(accuracy_scores), " %")

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

for i, j in zip(list(models.keys()), predicted):
    print (' \n \n ', i, ' : \n \n', classification_report(j, y_test))
    confusion = confusion_matrix(j, y_test)
    print (confusion, '\n \n Total : ', y_test.shape[0], '\n Truth : ', confusion[0, 0] + confusion[1, 1], '\n Error : ', confusion[0, 1] + confusion[1, 0])

In [None]:
from tensorflow.keras import models, layers
from tensorflow.keras.optimizers import Adam

model = models.Sequential([
    layers.Dense(x_train.shape[1], activation = "sigmoid"),
    layers.Dense(256, activation = "relu"),
    layers.Dense(512, activation = "relu"),
    layers.Dense(256, activation = "relu"),
    layers.Dense(1, activation = "sigmoid"),
])

model.compile(optimizer = Adam(learning_rate = 0.0001), loss = 'binary_crossentropy', metrics = "accuracy")

model.fit(x_train, y_train, epochs = 10)

model.evaluate(x_test, y_test)

In [None]:
y_pred = model.predict(x_test)

In [None]:
y_pred = [
    1 if prob > 0.5 else 0 for prob in np.ravel(y_pred)
]

y_pred = np.array(y_pred)

In [None]:
pd.Series(y_pred).value_counts()

In [None]:
print (classification_report(y_test, y_pred))

print ("\n")

confusion = confusion_matrix(y_test, y_pred)
print (confusion, '\n \n Total : ', y_test.shape[0], '\n Truth : ', confusion[0, 0] + confusion[1, 1], '\n Error : ', confusion[0, 1] + confusion[1, 0])