In [6]:
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor

data = pd.read_csv('data.csv')
data['Date'] = pd.to_datetime(data['Date'],dayfirst = True)
data['Year'] = data['Date'].dt.year
data['Month'] = data['Date'].dt.month
data['Day_of_week'] = data['Date'].dt.dayofweek
data['Day'] = data['Date'].dt.day
public_holidays_df = data[data['Public Holiday'].notna()]
public_holidays_df['Date'] = public_holidays_df['Date'].dt.strftime('%d/%m/%Y')
data['Public Holiday'] = data['Public Holiday'].fillna(0)
data['Public Holiday'] = data['Public Holiday'].apply(lambda x: 1 if x else 0)
data['Event'] = data['Event'].apply(lambda x: 1 if x == 'TRUE' else 0)

X = data[['Day_of_week', 'Day', 'Month', 'Year','Event', 'Public Holiday',
          'India_Reservation']]
y = data[['Customers_Chinese', 'Customers_India']]

busy_threshold = 100
def is_busy(customers_prediction):
    return customers_prediction > busy_threshold

X_train,X_test,y_train, y_test = train_test_split(X,y,test_size = 7)

model = DecisionTreeRegressor(random_state=42)
model.fit(X_train,y_train)

predictions = pd.DataFrame(model.predict(X_test), columns=['Predicted_Customers_Chinese', 'Predicted_Customers_India'])
X_test.reset_index(drop=True, inplace=True)
date = X_test.apply(lambda row: f"{row['Day']}/{row['Month']}/{row['Year']}", axis=1)
predictions['Date'] = pd.to_datetime(date).dt.strftime('%d/%m/%Y')
predictions['Day'] = pd.to_datetime(date).dt.day_name()

def get_public_holiday(date):
    if date in public_holidays_df['Date'].values:
        return public_holidays_df.loc[public_holidays_df['Date'] == date, 'Public Holiday'].iloc[0]
    else:
        return ''
predictions['Public Holiday'] = predictions['Date'].apply(get_public_holiday)
predictions['India_Reservation'] = X_test['India_Reservation']
predictions['Chinese_Buffet_Busy'] = predictions['Predicted_Customers_Chinese'].apply(is_busy)
predictions['Indian_Buffet_Busy'] = predictions['Predicted_Customers_India'].apply(is_busy)
predictions = predictions[['Date', 'Day', 'Public Holiday', 'Predicted_Customers_Chinese', 'Chinese_Buffet_Busy', 'India_Reservation','Predicted_Customers_India', 'Indian_Buffet_Busy']]

print(predictions)


         Date       Day Public Holiday  Predicted_Customers_Chinese  \
0  20/10/2023    Friday                                       117.0   
1  21/01/2022    Friday                                       142.0   
2  10/06/2021  Thursday                                        96.0   
3  23/10/2022    Sunday                                       134.0   
4  16/06/2023    Friday                                       139.0   
5  26/04/2021    Monday                                       118.0   
6  15/07/2023  Saturday                                       175.0   

   Chinese_Buffet_Busy  India_Reservation  Predicted_Customers_India  \
0                 True               True                       84.0   
1                 True               True                       95.0   
2                False               True                       88.0   
3                 True              False                        0.0   
4                 True              False                        0.0   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  public_holidays_df['Date'] = public_holidays_df['Date'].dt.strftime('%d/%m/%Y')
  predictions['Date'] = pd.to_datetime(date).dt.strftime('%d/%m/%Y')
  predictions['Day'] = pd.to_datetime(date).dt.day_name()
