In [155]:
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor

data = pd.read_csv('data.csv')
data['Date'] = pd.to_datetime(data['Date'],dayfirst = True)
data['Year'] = data['Date'].dt.year
data['Month'] = data['Date'].dt.month
data['Day_of_week'] = data['Date'].dt.dayofweek
data['Day'] = data['Date'].dt.day
public_holidays_df = data[data['Public Holiday'].notna()]
public_holidays_df['Date'] = public_holidays_df['Date'].dt.strftime('%d/%m/%Y')
data['Public Holiday'] = data['Public Holiday'].fillna(0)
data['Public Holiday'] = data['Public Holiday'].apply(lambda x: 1 if x else 0)
data['Event'] = data['Event'].apply(lambda x: 1 if x == 'TRUE' else 0)

X = data[['Day_of_week', 'Day', 'Month', 'Year','Event', 'Public Holiday',
          'India_Reservation']]
y = data[['Customers_Chinese', 'Customers_India']]

busy_threshold = 100
def is_busy(customers_prediction):
    return customers_prediction > busy_threshold

X_train,X_test,y_train, y_test = train_test_split(X,y,test_size = 7)

model = DecisionTreeRegressor(random_state=42)
model.fit(X_train,y_train)

predictions = pd.DataFrame(model.predict(X_test), columns=['Predicted_Customers_Chinese', 'Predicted_Customers_India'])
X_test.reset_index(drop=True, inplace=True)
date = X_test.apply(lambda row: f"{row['Day']}/{row['Month']}/{row['Year']}", axis=1)
predictions['Date'] = pd.to_datetime(date).dt.strftime('%d/%m/%Y')
predictions['Day'] = pd.to_datetime(date).dt.day_name()

def get_public_holiday(date):
    if date in public_holidays_df['Date'].values:
        return public_holidays_df.loc[public_holidays_df['Date'] == date, 'Public Holiday'].iloc[0]
    else:
        return ''
predictions['Public Holiday'] = predictions['Date'].apply(get_public_holiday)
predictions['India_Reservation'] = X_test['India_Reservation']
predictions['Chinese_Buffet_Busy'] = predictions['Predicted_Customers_Chinese'].apply(is_busy)
predictions['Indian_Buffet_Busy'] = predictions['Predicted_Customers_India'].apply(is_busy)
predictions = predictions[['Date', 'Day', 'Public Holiday', 'Predicted_Customers_Chinese', 'Chinese_Buffet_Busy', 'India_Reservation','Predicted_Customers_India', 'Indian_Buffet_Busy']]

print(predictions)


         Date        Day Public Holiday  Predicted_Customers_Chinese  \
0  29/05/2023     Monday                                        92.0   
1  09/03/2022  Wednesday                                       147.0   
2  27/08/2022   Saturday                                       142.0   
3  27/10/2023     Friday                                       119.0   
4  15/06/2022  Wednesday                                       108.0   
5  29/11/2023  Wednesday                                        82.0   
6  18/01/2021     Monday                                       109.0   

   Chinese_Buffet_Busy  India_Reservation  Predicted_Customers_India  \
0                False              False                        0.0   
1                 True              False                        0.0   
2                 True               True                      135.0   
3                 True              False                        0.0   
4                 True              False                      

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  public_holidays_df['Date'] = public_holidays_df['Date'].dt.strftime('%d/%m/%Y')
  predictions['Date'] = pd.to_datetime(date).dt.strftime('%d/%m/%Y')
  predictions['Day'] = pd.to_datetime(date).dt.day_name()


In [158]:
test_2024 = pd.read_csv('test.csv')
test_2024.drop(columns = ['Unnamed: 0'], inplace = True)


In [159]:
day = test_2024['Day'].copy()
test_2024['Date'] = pd.to_datetime(test_2024['Date'], dayfirst = False)
test_2024['Year'] = test_2024['Date'].dt.year
test_2024['Month'] = test_2024['Date'].dt.month
test_2024['Day_of_week'] = test_2024['Date'].dt.dayofweek
test_2024['Day'] = test_2024['Date'].dt.day
public_holidays_df = test_2024[test_2024['Public Holiday'].notna()]
public_holidays_df['Date'] = public_holidays_df['Date'].dt.strftime('%d/%m/%Y')
test_2024['Public Holiday'] = test_2024['Public Holiday'].fillna(0)
test_2024['Public Holiday'] = test_2024['Public Holiday'].apply(lambda x: 1 if x else 0)
test_2024['Event'] = test_2024['Event'].apply(lambda x: 1 if x == 'TRUE' else 0)

test_2024_x = test_2024[['Day_of_week', 'Day', 'Month', 'Year','Event', 'Public Holiday',
          'India_Reservation']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  public_holidays_df['Date'] = public_holidays_df['Date'].dt.strftime('%d/%m/%Y')


In [160]:
predictions = pd.DataFrame(model.predict(test_2024_x), columns=['Predicted_Customers_Chinese', 'Predicted_Customers_India'])
date = test_2024_x.apply(lambda row: f"{row['Day']}/{row['Month']}/{row['Year']}", axis=1)
predictions['Date'] = pd.to_datetime(date).dt.strftime('%d/%m/%Y')
predictions['Day'] = pd.to_datetime(date).dt.day_name()
test_2024_x.reset_index(drop=True, inplace=True)

def get_public_holiday(date):
    if date in public_holidays_df['Date'].values:
        return public_holidays_df.loc[public_holidays_df['Date'] == date, 'Public Holiday'].iloc[0]
    else:
        return ''
predictions['Public Holiday'] = predictions['Date'].apply(get_public_holiday)
predictions['India_Reservation'] = test_2024_x['India_Reservation']
predictions['Chinese_Buffet_Busy'] = predictions['Predicted_Customers_Chinese'].apply(is_busy)
predictions['Indian_Buffet_Busy'] = predictions['Predicted_Customers_India'].apply(is_busy)
predictions = predictions[['Date', 'Day', 'Public Holiday', 'Predicted_Customers_Chinese', 'Chinese_Buffet_Busy', 'India_Reservation','Predicted_Customers_India', 'Indian_Buffet_Busy']]

print(predictions)

           Date        Day  Public Holiday  Predicted_Customers_Chinese  \
0    01/01/2024     Monday  New Year's Day                        177.0   
1    01/02/2024   Thursday                                        103.0   
2    01/03/2024     Friday                                         98.0   
3    01/04/2024     Monday                                         83.0   
4    01/05/2024  Wednesday      Labour Day                         97.0   
..          ...        ...             ...                          ...   
366  01/01/2025  Wednesday  New Year's Day                        177.0   
367  01/02/2025   Saturday                                         83.0   
368  01/03/2025   Saturday                                         89.0   
369  01/04/2025    Tuesday                                        168.0   
370  01/05/2025   Thursday                                        168.0   

     Chinese_Buffet_Busy  India_Reservation  Predicted_Customers_India  \
0                   True 

  predictions['Date'] = pd.to_datetime(date).dt.strftime('%d/%m/%Y')
  predictions['Day'] = pd.to_datetime(date).dt.day_name()


In [131]:
predictions['Date']

0      1/1/2024
1      2/1/2024
2      3/1/2024
3      4/1/2024
4      5/1/2024
         ...   
366    1/1/2025
367    2/1/2025
368    3/1/2025
369    4/1/2025
370    5/1/2025
Name: Date, Length: 371, dtype: object

In [117]:
predictions.to_csv('predictions.csv',index = False)