In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import plotly.express as px
import plotly.graph_objects as go

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras import applications

import warnings
warnings.filterwarnings("ignore")
warnings.warn("this will not show")

# Datayi Kaggle'den cekiyoruz
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



/kaggle/input/time-spent-bill-amount-data-of-restaurants/restaurant_data.csv


# Read and Look at the Datase

In [64]:
df = pd.read_csv("/kaggle/input/time-spent-bill-amount-data-of-restaurants/restaurant_data.csv") 
df.head(3)

Unnamed: 0,Day,Entry Time,Exit Time,Meal Type,Number of People,Time Spent (minutes),Bill Amount ($),Gender,Table Location,Reservation,Customer Satisfaction,Live Music,Age Group,Weather Condition
0,Saturday,17:14,18:56,Dinner,4,102,117.08,Female,Window,False,4,False,18-25,Cloudy
1,Friday,22:13,23:15,Dinner,4,62,45.8,Female,Window,True,2,False,26-35,Snowy
2,Sunday,13:02,13:42,Lunch,3,40,34.64,Male,Center,False,1,True,36-45,Sunny


## 1.1. Drop Ediyoruz

In [4]:
df.drop(['Entry Time', 'Exit Time'], axis=1, inplace=True)

In [None]:
df.head(2)

# EDA
## 1.1. Info

In [None]:
df.info()

In [None]:
df.head(2)

## 1.2. Null Check

In [None]:
df.isna().sum()

## 1.3. Shape

In [None]:
df.shape

## 1.4. Unique Values

In [65]:
def get_unique_values(df):
    
    output_data = []

    for col in df.columns:

        # If the number of unique values in the column is less than or equal to 10
        if df.loc[:, col].nunique() <= 10:
            # Get the unique values in the column
            unique_values = df.loc[:, col].unique()
            # Append the column name, number of unique values, unique values, and data type to the output data
            output_data.append([col, df.loc[:, col].nunique(), unique_values, df.loc[:, col].dtype])
        else:
            # Otherwise, append only the column name, number of unique values, and data type to the output data
            output_data.append([col, df.loc[:, col].nunique(),"-", df.loc[:, col].dtype])

    output_df = pd.DataFrame(output_data, columns=['Column Name', 'Number of Unique Values', ' Unique Values ', 'Data Type'])

    return output_df

get_unique_values(df)

Unnamed: 0,Column Name,Number of Unique Values,Unique Values,Data Type
0,Day,7,"[Saturday, Friday, Sunday, Tuesday, Monday, We...",object
1,Entry Time,784,-,object
2,Exit Time,838,-,object
3,Meal Type,3,"[Dinner, Lunch, Breakfast]",object
4,Number of People,6,"[4, 3, 5, 6, 2, 1]",int64
5,Time Spent (minutes),151,-,int64
6,Bill Amount ($),1849,-,float64
7,Gender,3,"[Female, Male, Other]",object
8,Table Location,3,"[Window, Center, Patio]",object
9,Reservation,2,"[False, True]",bool


## 1.5. Column Names

In [None]:
df.columns

## 1.6. Descriptive Statistics

In [None]:
df.describe().T

## 1.7. Correlation

In [None]:
# Sadece numeric kolonlari sectim ve onlar arasindaki korealsyona bakiyorum
df.select_dtypes(include=[np.number]).corr()

In [None]:
sns.heatmap(df.select_dtypes(include=[np.number]).corr(), annot = True);

## 1.8. Pairplot

In [None]:
sns.pairplot(df, kind = "reg", diag_kind = "kde", diag_kws={"color":"red"}, plot_kws={"line_kws":{"color":"red"}});

## 1.9. Outliers Check

In [None]:
# Let's draw boxplots and histplots for checking distributions of features;
index=0
for feature in df.select_dtypes('number').columns:
    index+=1
    plt.figure(figsize=(40,40))
    plt.subplot((len(df.columns)),2,index)
    sns.boxplot(x=feature,data=df,whis=3) 
        
    plt.tight_layout()
    
    plt.show()

## 1.10. Bar Charts

In [None]:
# Kategorik verilere gore odenen hesap miktarlari 
# Kategorik sütunların listesi
categorical_columns = ['Day', 'Meal Type', 'Gender', 'Table Location', 'Reservation', 'Customer Satisfaction', 'Live Music', 'Age Group', 'Weather Condition']

# Her kategorik sütun için ortalama 'Bill Amount ($)' gösteren bar chart oluşturma
plt.figure(figsize=(20, 20))
for i, column in enumerate(categorical_columns, 1):
    plt.subplot(3, 3, i)  # 3x3 grid layout
    barplot = sns.barplot(x=column, y='Bill Amount ($)', data=df, ci=None)  # Confidence interval kaldırıldı
    plt.title(f'Average Bill Amount by {column}')
    plt.xticks(rotation=45)
    
    # Bar üstlerine değerleri yazdırma
    for p in barplot.patches:
        barplot.annotate(format(p.get_height(), '.2f'), 
                         (p.get_x() + p.get_width() / 2., p.get_height()), 
                         ha = 'center', va = 'center', 
                         xytext = (0, 10), 
                         textcoords = 'offset points')

plt.tight_layout()
plt.show()

In [None]:
# Kategorik verilere gore harcanan zaman miktarlari
# Kategorik sütunların listesi
categorical_columns = ['Day', 'Meal Type', 'Gender', 'Table Location', 'Reservation', 'Customer Satisfaction', 'Live Music', 'Age Group', 'Weather Condition']

# Her kategorik sütun için ortalama 'Bill Amount ($)' gösteren bar chart oluşturma
plt.figure(figsize=(20, 20))
for i, column in enumerate(categorical_columns, 1):
    plt.subplot(3, 3, i)  # 3x3 grid layout
    barplot = sns.barplot(x=column, y='Time Spent (minutes)', data=df, ci=None)  # Confidence interval kaldırıldı
    plt.title(f'Time Spent (minutes) by {column}')
    plt.xticks(rotation=45)
    
    # Bar üstlerine değerleri yazdırma
    for p in barplot.patches:
        barplot.annotate(format(p.get_height(), '.2f'), 
                         (p.get_x() + p.get_width() / 2., p.get_height()), 
                         ha = 'center', va = 'center', 
                         xytext = (0, 10), 
                         textcoords = 'offset points')

plt.tight_layout()
plt.show()

## 1.11. Scatterplot 

In [None]:
sns.scatterplot(x="Time Spent (minutes)", y= "Bill Amount ($)", data = df, hue = "Live Music");

**Insight:** Live music oldgu gunlerde kisilerin odedikleri hesap daha fazladir.   

In [None]:
# Time Spent (minutes) ile Bill Amount ($) arasındaki ilişkiyi scatter plot ile inceleme ve eğilim çizgisi ekleme
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Time Spent (minutes)', y='Bill Amount ($)', data=df)
sns.regplot(x='Time Spent (minutes)', y='Bill Amount ($)', data=df, scatter=False, color='red')  # Kırmızı eğilim çizgisi
plt.title('Relationship between Time Spent and Bill Amount')
plt.show()

**Insight:** Musterilerin restoranda harcadiklari sure arttikca odedikleri hesap miktari da artmistir.

## 1.12. Drop

In [None]:
"""df.drop(['Entry Time', 'Exit Time'], axis=1, inplace=True)"""

# 2. DEEP LEARNING

## 2.1. Encoding

In [5]:
df.head(2)

Unnamed: 0,Day,Meal Type,Number of People,Time Spent (minutes),Bill Amount ($),Gender,Table Location,Reservation,Customer Satisfaction,Live Music,Age Group,Weather Condition
0,Saturday,Dinner,4,102,117.08,Female,Window,False,4,False,18-25,Cloudy
1,Friday,Dinner,4,62,45.8,Female,Window,True,2,False,26-35,Snowy


In [None]:
df.columns

In [6]:
# Siralamanin onemli olmadigini dusundugumuz verilere "One-Hot Encoding" uygulayalim.

df = pd.get_dummies(df, columns=['Live Music', 'Reservation', 'Meal Type', 'Day', 'Gender', 'Table Location', 'Age Group', 'Weather Condition'], drop_first=True)

In [7]:
df.head(2)

Unnamed: 0,Number of People,Time Spent (minutes),Bill Amount ($),Customer Satisfaction,Live Music_True,Reservation_True,Meal Type_Dinner,Meal Type_Lunch,Day_Monday,Day_Saturday,...,Table Location_Patio,Table Location_Window,Age Group_26-35,Age Group_36-45,Age Group_46-55,Age Group_56-65,Age Group_65+,Weather Condition_Rainy,Weather Condition_Snowy,Weather Condition_Sunny
0,4,102,117.08,4,False,False,True,False,False,True,...,False,True,False,False,False,False,False,False,False,False
1,4,62,45.8,2,False,True,True,False,False,False,...,False,True,True,False,False,False,False,False,True,False


In [8]:
# Dönüştürülen sütunların veri tipini int'e çevirme
for column in df.columns:
    if df[column].dtype == 'bool':
        df[column] = df[column].astype(int)

In [9]:
df.head(2)

Unnamed: 0,Number of People,Time Spent (minutes),Bill Amount ($),Customer Satisfaction,Live Music_True,Reservation_True,Meal Type_Dinner,Meal Type_Lunch,Day_Monday,Day_Saturday,...,Table Location_Patio,Table Location_Window,Age Group_26-35,Age Group_36-45,Age Group_46-55,Age Group_56-65,Age Group_65+,Weather Condition_Rainy,Weather Condition_Snowy,Weather Condition_Sunny
0,4,102,117.08,4,0,0,1,0,0,1,...,0,1,0,0,0,0,0,0,0,0
1,4,62,45.8,2,0,1,1,0,0,0,...,0,1,1,0,0,0,0,0,1,0


## Save the Encoding

In [10]:
# Encode edilmis data setini pickle olarak kaydetmek 
# Bunu basta 1 defa yapip, encoding islemini disa aktardiktan sonra datayi 
# bastan okutup, burayi atlayarak calistirdim ki df i butun olarak koruyabileyim. 
# Bunu kaydetmemin nedeni; predictipn asamasinda yeni gelen veriye de encoding yapabilmek. 
# Yeni gelen datada "Bill Amount ($)" verisi yer almayacagi icin onu drop ederek yaptim 
# encoding islemini ve bu halini kaydsettim. 
"""df.drop(["Bill Amount ($)"], axis = 1).to_pickle('encoded_data.pkl')"""
df.to_pickle('encoded_data.pkl')

## 2.1. Labelling

In [11]:
X = df.drop(["Bill Amount ($)"], axis = 1)
y = df["Bill Amount ($)"]

## 2.3. Split Train & Test

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size = 0.2,
                                                    random_state = 42)

## 2.4. Scaling the Data

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
scaler = MinMaxScaler()

In [None]:
X_train= scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### 2.4.1. Save the Scaling 

In [None]:
# Scaler'i  disa kaydedelim
import pickle
pickle.dump(scaler, open("scaler_saved", 'wb')) #write binary

## 2.5. Modelling

In [None]:
X_train.shape

In [None]:
# DL modelini kuruyoruz;
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dropout

seed = 101

tf.random.set_seed(seed) #random sayi uretirken belli bir cekirdek etrafinda dolanmasini ve boylece herkeste ayni sonucun cikmasini saglar.

model = Sequential() # katmanli, ard arda yapi kullanacagiz.

# 1. Hidden Layer
model.add(Dense(32, input_dim=X_train.shape[1], activation = 'relu')) # X_train.shape[1]; Kullanilan feature sayisini belirtir. 
#model.add(Dropout(0.2)) # %20sini sondur / sifirla   daha iyi bir ogrenme gerceklessin diye


# 2. Hidden Layer
model.add(Dense(32, activation = 'relu')) # Hidden layerdeki noron 32 tane olacak. 
#model.add(Dropout(0.2))   # Bunu kullaninca skorlar kotulasti. 


# 3. Hidden Layer
model.add(Dense(16, activation = 'relu')) # Aktivasyon fonksiyonu ara basamaklarda relu kullaniliyor 
#model.add(Dropout(0.2))


# 4. Hidden Layer
model.add(Dense(8, activation = 'relu'))  # Sona dogru olan hidden layerlerde noron sayisinin azaltilarak verilmesi "best practice"
#model.add(Dropout(0.2))

# Cikis Katmani
model.add(Dense(1)) # Rgresyon oldugu icin Dense 1 olmali!!!     

# Optimizer'i tanimlayalim 
optimizer = Adam(lr = 0.003) # default learning rate value is 0.001. Bunu 0.002 / 0.003 yapabiliriz. Adam optimizer#i kullaniyoruz burada. Gredient descent de kullanilabilir burada. 

#Modeli compile edelim
model.compile(optimizer = 'adam', loss = 'mse') # Bu satiri dogru ver! Regresyon analizlerinde loss = 'mse' olmasi onemli. 

# Early Stop
early_stop = EarlyStopping(monitor = "val_loss", mode = "auto", verbose = 1, patience = 25) # "patience=25" epoch boyunca sabret, ondan daha iyi bir skor gelmezse stop et demek. 
# patiance genelde 15, 20, 25 gibi verilir. 
# mode = "auto" Loss degeri dustugu zaman bizim icin daha iyi demek

#Model Summary
model.summary()

### 2.5.1. Fit the model 

In [None]:
model.fit(x = X_train, y = y_train, validation_split = 0.15, batch_size = 128, epochs = 100) # epoch 1000 olursa 20-25 dk suruyor
# validation_split = 0.15   train datasi icindeki datanin ne kadarini validation olarak degerlenduirdigimizi gosteriyor. Cross-Valuiidation orani

### 2.5.2. Model History

In [None]:
pd.DataFrame(model.history.history)

### 2.5.3. Model Evaluation

In [None]:
loss_df = pd.DataFrame(model.history.history)# Bu iki egri birbirine yakin olmalidir. 
loss_df.plot();

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score

def eval_metric(actual, pred):
    mae = mean_absolute_error(actual, pred)
    mse = mean_squared_error(actual, pred)
    rmse = np.sqrt(mean_squared_error(actual, pred))
    score = r2_score(actual, pred)
    return print("r2_score:", score, "\nmae:", mae, "\nmse:", mse, "\nrmse:", rmse)

In [None]:
model.evaluate(X_test, y_test, verbose=0) # test datasindaki loss miktari

In [None]:
y_pred = model.predict(X_test)

In [None]:
eval_metric(y_test, y_pred) # hic bir degisiklik yapmadan tekrar calsitir modeli, bunun sonucu iyilesebilir
# Bunun sebebi; agirlik ve bias'in random olarak seciliyor olmasi. 
# ANN sacmalarsa buna "Halusulasyon" denir, modeli tekrar calistir, duzelebilir. 
# Hala duzelmiyorsa Epoch artirilabilir, diger parametreler degistirilebilir. 

**Sonuc:** ANN sonucu istedgimiz gibi degil. ML yapacagiz. 

# 3. MACHINE LEARNING 

Burada 12 tane ML yontemi bir arada calistirilacaktir.

In [17]:
from sklearn.linear_model import Ridge, RidgeCV, Lasso, LassoCV
from sklearn.linear_model import ElasticNet
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor

In [None]:
ridge=Ridge().fit(X_train, y_train)
lasso=Lasso().fit(X_train, y_train)
enet=ElasticNet().fit(X_train, y_train)
knn=KNeighborsRegressor().fit(X_train, y_train)
ada=AdaBoostRegressor().fit(X_train, y_train)
svm=SVR().fit(X_train, y_train)
dtc=DecisionTreeRegressor().fit(X_train, y_train)
rf=RandomForestRegressor().fit(X_train, y_train)
xgb=XGBRegressor().fit(X_train, y_train)
gbm=GradientBoostingRegressor().fit(X_train, y_train)
lgb=LGBMRegressor().fit(X_train, y_train) # LightGBM
catbost=CatBoostRegressor().fit(X_train, y_train)

## 3.1. Train datasi icin skorlari alalim

In [None]:
models=[ridge,lasso,enet,knn,ada,svm,dtc,rf,xgb,gbm,lgb,catbost]

def ML(y,models):
    r2_score=models.score(X_train, y_train)
    return r2_score

In [None]:
for i in models:
     print(i,"Algorithm succed rate :", ML("Bill Amount ($)",i))

## 3.2. Teste ait skorlari alalim

In [None]:
# Test datasi icin skorlari alacagiz;
def ML(y,models):
    r2_score=models.score(X_test, y_test)
    return r2_score

In [None]:
for i in models:
     print(i,"Algorithm succed rate :",ML("Bill Amount ($)",i))

**Yorum:**  "DecisionTreeRegressor", "XGBRegressor" ve "GradientBoostingRegressor" yontemlerinin train datasinda basarilari ci=ok yuksek iken testte dusuyor. Bu, overfittin'e isaret eder. Bunu asabilmek icin GridrsearchCV yapalim.   

## 3.3. GridSearchCV ile Optimum Hyper Parametreleri Belirleyelim

### 3.3.1. DecisionTreeRegressor icin GridSearchCV Yapalim 

In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV

# Parametre gridini ayarlayalım
param_grid = {
    "max_depth": [3, 5, 10, None],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4],
    "max_features": ["auto", "sqrt", "log2", None]}

# DecisionTreeRegressor modelini oluşturalım
dt_model = DecisionTreeRegressor(random_state=101)

# GridSearchCV nesnesini oluşturalım
grid_dt_model = GridSearchCV(estimator=dt_model, param_grid=param_grid, cv=10, n_jobs=-1)

# Modeli eğitelim
grid_dt_model.fit(X_train, y_train)

# En iyi skoru ve parametreleri yazdıralım
print("En İyi Skor:", grid_dt_model.best_score_)
print("En İyi Parametreler:", grid_dt_model.best_params_)


In [None]:
best_grid_model = grid_dt_model.best_estimator_

En İyi Skor: 0.77081479548352

En İyi Parametreler: {'max_depth': 5, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2}

### Save the Model 

In [None]:
# Final modeli kaydedelim 
import pickle
pickle.dump(best_grid_model, open("best_grid_model", 'wb'))

### 3.3.2. XGBoostRegressor icin GridSearchCV Yapalim

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {"n_estimators":[100, 300, 500],'max_depth':[3,5,6,7], "learning_rate": [0.05, 0.1, 0.2],
             "subsample":[0.5, 1], "colsample_bytree":[0.5, 1]}

xgb_model = XGBRegressor(booster='gblinear', random_state=101, silent=True, objective="reg:squarederror")
grid_xgb_model = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=10, n_jobs = -1)

# Modeli egitelim
grid_xgb_model.fit(X_train, y_train)

# En iyi skoru ve parametreleri yazdıralım
print("En İyi Skor:", grid_xgb_model.best_score_)
print("En İyi Parametreler:", grid_xgb_model.best_params_)

En İyi Skor: 0.7696271078817388

En İyi Parametreler: {'colsample_bytree': 0.5, 'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 500, 'subsample': 0.5}

In [None]:
best_XGB_grid_model=grid_xgb_model.best_estimator_

In [None]:
# Final modeli kaydedelim 
import pickle
pickle.dump(best_XGB_grid_model, open("best_XGB_grid_model", 'wb'))

### 3.3.3. GradientBoostRegressor icin GridSearchCV Yapalim

Bu coook uzun surdu ve sonunda hata verdi. Calistirma

In [None]:
"""from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV

# Parametre gridini ayarlayalım
param_grid = {
    "n_estimators": [100, 300, 500],
    "max_depth": [2, 3, 5, 6, 7],
    "learning_rate": [0.001, 0.01, 0.05, 0.1, 0.2, 0.5],
    "subsample": [0.5, 1],
    "max_features": ["auto", "sqrt", "log2", None, 2, 3, 4]
}

# GradientBoostingRegressor modelini oluşturalım
gb_model = GradientBoostingRegressor(random_state=101)

# GridSearchCV nesnesini oluşturalım
grid_gb_model = GridSearchCV(estimator=gb_model, param_grid=param_grid, cv=10, n_jobs=-1)

# Modeli eğitelim
grid_gb_model.fit(X_train, y_train)

# En iyi skoru ve parametreleri yazdıralım
print("En İyi Skor:", grid_gb_model.best_score_)
print("En İyi Parametreler:", grid_gb_model.best_params_)"""


# 4. FINAL MODEL

Final modeli tum verileri kullanarak kuracagiz

## 4.1. Labelling

In [12]:
X = df.drop(["Bill Amount ($)"], axis = 1)
y = df["Bill Amount ($)"]

## 4.2. Scaling

Daha once scaling islemini X_train uzerinden yapmistik, simdi tum X uzerinden yapacagiz. 

In [13]:
# Scaling isleminde kullanacagimiz kutuphaneleri cagiralim;

from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
scaler = MinMaxScaler() 

In [14]:
X= scaler.fit_transform(X)

In [15]:
# Scaler'i  disa kaydedelim
import pickle
pickle.dump(scaler, open("final_scaler_saved", 'wb')) #write binary

## 4.3. Final DT Model

In [18]:
# Yukarida belirledigimiz optimum hyper parametrelerio kullanarak final DT modelini tahmin edelim;
final_DT_model=DecisionTreeRegressor(max_depth=5, max_features='auto', min_samples_leaf=1, min_samples_split=2).fit(X, y)

In [19]:
# Final DT modeli kaydedelim 
import pickle
pickle.dump(final_DT_model, open("final_DT_model", 'wb'))

## 4.4. Final XGB Model

In [20]:
# Yukarida belirledigimiz optimum hyper parametrelerio kullanarak final XGB modelini tahmin edelim;
final_XGB_model=XGBRegressor(colsample_bytree= 0.5, learning_rate= 0.2, max_depth= 3, n_estimators= 500, subsample=0.5).fit(X, y)

In [21]:
# Final XGB modeli kaydedelim 
import pickle
pickle.dump(final_XGB_model, open("final_XGB_model", 'wb'))

## Final ANN Model

In [22]:
# DL modelini kuruyoruz;
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dropout

seed = 101

tf.random.set_seed(seed) #random sayi uretirken belli bir cekirdek etrafinda dolanmasini ve boylece herkeste ayni sonucun cikmasini saglar.

model = Sequential() # katmanli, ard arda yapi kullanacagiz.

# 1. Hidden Layer
model.add(Dense(32, input_dim=X.shape[1], activation = 'relu')) # X.shape[1]; Kullanilan feature sayisini belirtir. 
#model.add(Dropout(0.2)) # %20sini sondur / sifirla   daha iyi bir ogrenme gerceklessin diye


# 2. Hidden Layer
model.add(Dense(32, activation = 'relu')) # Hidden layerdeki noron 32 tane olacak. 
#model.add(Dropout(0.2))   # Bunu kullaninca skorlar kotulasti. 


# 3. Hidden Layer
model.add(Dense(16, activation = 'relu')) # Aktivasyon fonksiyonu ara basamaklarda relu kullaniliyor 
#model.add(Dropout(0.2))


# 4. Hidden Layer
model.add(Dense(8, activation = 'relu'))  # Sona dogru olan hidden layerlerde noron sayisinin azaltilarak verilmesi "best practice"
#model.add(Dropout(0.2))

# Cikis Katmani
model.add(Dense(1)) # Rgresyon oldugu icin Dense 1 olmali!!!     

# Optimizer'i tanimlayalim 
optimizer = Adam(lr = 0.003) # default learning rate value is 0.001. Bunu 0.002 / 0.003 yapabiliriz. Adam optimizer#i kullaniyoruz burada. Gredient descent de kullanilabilir burada. 

#Modeli compile edelim
model.compile(optimizer = 'adam', loss = 'mse') # Bu satiri dogru ver! Regresyon analizlerinde loss = 'mse' olmasi onemli. 

# Early Stop
early_stop = EarlyStopping(monitor = "val_loss", mode = "auto", verbose = 1, patience = 25) # "patience=25" epoch boyunca sabret, ondan daha iyi bir skor gelmezse stop et demek. 
# patiance genelde 15, 20, 25 gibi verilir. 
# mode = "auto" Loss degeri dustugu zaman bizim icin daha iyi demek

#Model Summary
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 32)                832       
                                                                 
 dense_1 (Dense)             (None, 32)                1056      
                                                                 
 dense_2 (Dense)             (None, 16)                528       
                                                                 
 dense_3 (Dense)             (None, 8)                 136       
                                                                 
 dense_4 (Dense)             (None, 1)                 9         
                                                                 
Total params: 2561 (10.00 KB)
Trainable params: 2561 (10.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [23]:
model.fit(x = X, y = y, batch_size = 128, epochs = 1500)

Epoch 1/1500
Epoch 2/1500
Epoch 3/1500
Epoch 4/1500
Epoch 5/1500
Epoch 6/1500
Epoch 7/1500
Epoch 8/1500
Epoch 9/1500
Epoch 10/1500
Epoch 11/1500
Epoch 12/1500
Epoch 13/1500
Epoch 14/1500
Epoch 15/1500
Epoch 16/1500
Epoch 17/1500
Epoch 18/1500
Epoch 19/1500
Epoch 20/1500
Epoch 21/1500
Epoch 22/1500
Epoch 23/1500
Epoch 24/1500
Epoch 25/1500
Epoch 26/1500
Epoch 27/1500
Epoch 28/1500
Epoch 29/1500
Epoch 30/1500
Epoch 31/1500
Epoch 32/1500
Epoch 33/1500
Epoch 34/1500
Epoch 35/1500
Epoch 36/1500
Epoch 37/1500
Epoch 38/1500
Epoch 39/1500
Epoch 40/1500
Epoch 41/1500
Epoch 42/1500
Epoch 43/1500
Epoch 44/1500
Epoch 45/1500
Epoch 46/1500
Epoch 47/1500
Epoch 48/1500
Epoch 49/1500
Epoch 50/1500
Epoch 51/1500
Epoch 52/1500
Epoch 53/1500
Epoch 54/1500
Epoch 55/1500
Epoch 56/1500
Epoch 57/1500
Epoch 58/1500
Epoch 59/1500
Epoch 60/1500
Epoch 61/1500
Epoch 62/1500
Epoch 63/1500
Epoch 64/1500
Epoch 65/1500
Epoch 66/1500
Epoch 67/1500
Epoch 68/1500
Epoch 69/1500
Epoch 70/1500
Epoch 71/1500
Epoch 72/1500
E

<keras.src.callbacks.History at 0x7bebdc70b9d0>

In [24]:
# Final ANN modeli kaydedelim 
import pickle
pickle.dump(model, open("final_ANN_model", 'wb'))

## Eval Metric'leri Alalim

In [25]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score
def eval_metric(actual, pred):
    mae = mean_absolute_error(actual, pred)
    mse = mean_squared_error(actual, pred)
    rmse = np.sqrt(mean_squared_error(actual, pred))
    score = r2_score(actual, pred)
    return print("r2_score:", score, "\nmae:", mae, "\nmse:", mse, "\nrmse:", rmse)

In [26]:
# DT icin skorlar
y_pred = final_DT_model.predict(X)
eval_metric(y, y_pred)


r2_score: 0.8035408652187307 
mae: 12.139308422925085 
mse: 211.51521555218025 
rmse: 14.543562684300579


In [28]:
# XGB icin skorlar
y_pred = final_XGB_model.predict(X)
eval_metric(y, y_pred)

r2_score: 0.9036024153683421 
mae: 8.338005879974364 
mse: 103.78522696221627 
rmse: 10.187503470537632


In [29]:
# ANN icin skorlar
y_pred = model.predict(X)
eval_metric(y, y_pred)

r2_score: 0.9257202035682293 
mae: 7.006456500091553 
mse: 79.97239309299863 
rmse: 8.942728503817984


# 5. PREDICTION

## 5.1. Loading Model and Scaler

In [33]:
from tensorflow.keras.models import load_model
import pickle

final_scaler = pickle.load(open("final_scaler_saved", "rb"))
DT_model = pickle.load(open('final_DT_model', "rb"))
XGB_model = pickle.load(open('final_XGB_model', "rb"))
ANN_model = pickle.load(open('final_ANN_model', "rb"))                      

## Prediction

In [32]:
df.head(1)

Unnamed: 0,Number of People,Time Spent (minutes),Bill Amount ($),Customer Satisfaction,Live Music_True,Reservation_True,Meal Type_Dinner,Meal Type_Lunch,Day_Monday,Day_Saturday,...,Table Location_Patio,Table Location_Window,Age Group_26-35,Age Group_36-45,Age Group_46-55,Age Group_56-65,Age Group_65+,Weather Condition_Rainy,Weather Condition_Snowy,Weather Condition_Sunny
0,4,102,117.08,4,0,0,1,0,0,1,...,0,1,0,0,0,0,0,0,0,0


In [31]:
# Elimizdeki mevcut data setinden bir gozlem cekelim;

Customer_A = df.drop('Bill Amount ($)', axis = 1).iloc[0:1, :]
Customer_A

Unnamed: 0,Number of People,Time Spent (minutes),Customer Satisfaction,Live Music_True,Reservation_True,Meal Type_Dinner,Meal Type_Lunch,Day_Monday,Day_Saturday,Day_Sunday,...,Table Location_Patio,Table Location_Window,Age Group_26-35,Age Group_36-45,Age Group_46-55,Age Group_56-65,Age Group_65+,Weather Condition_Rainy,Weather Condition_Snowy,Weather Condition_Sunny
0,4,102,4,0,0,1,0,0,1,0,...,0,1,0,0,0,0,0,0,0,0


In [34]:
# Bu dataya scaling uygulayalim;
Customer_A_Scaled = final_scaler.transform(Customer_A)
Customer_A_Scaled

array([[0.6 , 0.48, 0.75, 0.  , 0.  , 1.  , 0.  , 0.  , 1.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  ]])

### DT ile Predict Alalim

In [35]:
DT_model.predict(Customer_A_Scaled)

array([99.91333333])

In [36]:
# Gercekte odenen hesap
df.iloc[0][2]

117.08

**Yorum:** 0 indexe sahip musteri gercekte 117.08 USD'lik odeme yapmis. DT model bunu 99.91 USD olarak tahmin etti. 


### XGB ile Predict Alalim

In [37]:
XGB_model.predict(Customer_A_Scaled)

array([107.661964], dtype=float32)

**Yorum:** 0 indexe sahip musteri gercekte 117.66 USD'lik odeme yapmis. XGB model bunu 107.66 USD olarak tahmin etti. 

### ANN ile Predict Alalim

In [38]:
ANN_model.predict(Customer_A_Scaled)



array([[112.997406]], dtype=float32)

**Yorum:** 0 indexe sahip musteri gercekte 117.08 USD'lik odeme yapmis. ANN model bunu 112.99 USD olarak tahmin etti. 

### Yeni Bir Gozleme Ait Degerleri Kullanarak Predict Yapalim

In [39]:
df.columns

Index(['Number of People', 'Time Spent (minutes)', 'Bill Amount ($)',
       'Customer Satisfaction', 'Live Music_True', 'Reservation_True',
       'Meal Type_Dinner', 'Meal Type_Lunch', 'Day_Monday', 'Day_Saturday',
       'Day_Sunday', 'Day_Thursday', 'Day_Tuesday', 'Day_Wednesday',
       'Gender_Male', 'Gender_Other', 'Table Location_Patio',
       'Table Location_Window', 'Age Group_26-35', 'Age Group_36-45',
       'Age Group_46-55', 'Age Group_56-65', 'Age Group_65+',
       'Weather Condition_Rainy', 'Weather Condition_Snowy',
       'Weather Condition_Sunny'],
      dtype='object')

In [57]:
data = {
    "Day": ['Monday'],
    "Meal Type": ['Dinner'],
    "Number of People": [2],
    "Time Spent (minutes)": [125],
    "Gender": ['Male'],
    "Table Location": ['Window'],
    "Reservation": [1],
    "Customer Satisfaction": [4],
    "Live Music": [1],
    "Age Group": ['18-25'],
    "Weather Condition": ['Cloudy']
}

# Yeni DataFrame oluşturma
df_new = pd.DataFrame(data)
df_new

Unnamed: 0,Day,Meal Type,Number of People,Time Spent (minutes),Gender,Table Location,Reservation,Customer Satisfaction,Live Music,Age Group,Weather Condition
0,Monday,Dinner,2,125,Male,Window,1,4,1,18-25,Cloudy


In [58]:
encoded_data = pd.read_pickle('encoded_data.pkl')
# Assuming df_new is your new dataset
# Apply the same encoding to df_new
df_new_encoded = pd.get_dummies(df_new, columns=['Live Music', 'Reservation', 'Meal Type', 'Day', 'Gender', 'Table Location', 'Age Group', 'Weather Condition'], drop_first=True)
# Dönüştürülen sütunların veri tipini int'e çevirme
for column in df_new_encoded.columns:
    if df_new_encoded[column].dtype == 'bool':
        df_new_encoded[column] = df_new_encoded[column].astype(int)
# Ensure that the columns in df_new_encoded are the same as in the original encoding
# This is important to make sure the order and presence of columns are consistent

df_new_encoded = df_new_encoded.reindex(columns=encoded_data.columns, fill_value=0)
df_new_encoded

Unnamed: 0,Number of People,Time Spent (minutes),Bill Amount ($),Customer Satisfaction,Live Music_True,Reservation_True,Meal Type_Dinner,Meal Type_Lunch,Day_Monday,Day_Saturday,...,Table Location_Patio,Table Location_Window,Age Group_26-35,Age Group_36-45,Age Group_46-55,Age Group_56-65,Age Group_65+,Weather Condition_Rainy,Weather Condition_Snowy,Weather Condition_Sunny
0,2,125,0,4,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [59]:
df_new_encoded.drop(['Bill Amount ($)'], axis=1, inplace=True)
df_new_encoded

Unnamed: 0,Number of People,Time Spent (minutes),Customer Satisfaction,Live Music_True,Reservation_True,Meal Type_Dinner,Meal Type_Lunch,Day_Monday,Day_Saturday,Day_Sunday,...,Table Location_Patio,Table Location_Window,Age Group_26-35,Age Group_36-45,Age Group_46-55,Age Group_56-65,Age Group_65+,Weather Condition_Rainy,Weather Condition_Snowy,Weather Condition_Sunny
0,2,125,4,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [60]:
# Scaling Yapalim;

df_new_encoded_Scaled = final_scaler.transform(df_new_encoded)
df_new_encoded_Scaled

array([[0.2       , 0.63333333, 0.75      , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ]])

In [61]:
DT_model.predict(df_new_encoded_Scaled)

array([114.30901235])

In [62]:
XGB_model.predict(df_new_encoded_Scaled)

array([107.310646], dtype=float32)

In [63]:
ANN_model.predict(df_new_encoded_Scaled)



array([[90.015144]], dtype=float32)

In [None]:
df_new.columns

In [None]:
# One-Hot Encoding yapalim;
df_new = pd.get_dummies(df_new, columns=['Live Music', 'Reservation', 'Meal Type', 'Day', 'Gender', 'Table Location', 'Age Group', 'Weather Condition'], drop_first=True)
df_new