In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

# Load datasets
sales_df = pd.read_csv("Calgary_Restaurant_Sales.csv")
reservations_df = pd.read_csv("Calgary_Reservations_Data.csv")

# Convert 'Date' columns to datetime format
sales_df['Date'] = pd.to_datetime(sales_df['Date'])
reservations_df['Date'] = pd.to_datetime(reservations_df['Date'])

# Aggregate customer demand from reservations
reservations_df['Total_Customers'] = reservations_df['Party_Size']
daily_customers = reservations_df.groupby('Date')['Total_Customers'].sum().reset_index()

# Merge datasets
merged_df = sales_df.merge(daily_customers, on='Date', how='left')
merged_df.fillna(0, inplace=True)  # Fill missing values

# Feature selection
features = ['Total_Sales', 'Is_Weekend']  # Add more features as needed
target = 'Total_Customers'

# Ensure features exist
for col in features:
    if col not in merged_df.columns:
        raise KeyError(f"The column '{col}' was not found in the dataset.")

# Define X and y
X = merged_df[features]
y = merged_df[target]

# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train multiple regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Evaluate model
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Absolute Error: {mae}")
print(f"R-squared Score: {r2}")

# Forecast next 7 days
last_known_date = merged_df['Date'].max()
future_dates = [last_known_date + pd.Timedelta(days=i) for i in range(1, 8)]
future_data = pd.DataFrame({'Date': future_dates, 'Sales': np.mean(X_train['Total_Sales']), 'Is_Weekend': [1 if d.weekday() >= 5 else 0 for d in future_dates]})

future_predictions = model.predict(future_data[features])
future_data['Predicted_Customers'] = future_predictions
print(future_data[['Date', 'Predicted_Customers']])


KeyError: "The column 'Is_Weekend' was not found in the dataset."

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error


# Merge datasets on 'Date'
merged_df = pd.merge(reservations_df, sales_df, on='Date', how='inner')

# ✅ Add this line right after merging
merged_df['Is_Weekend'] = pd.to_datetime(merged_df['Date']).dt.weekday.isin([5, 6]).astype(int)

# Select relevant features for prediction
features = ['Party_Size', 'Previous_Visits', 'Wait_Time_Min', 'Table_Turn_Time_Min', 'Bill_Amount', 'Is_Weekend']


# Select relevant features for prediction
features = ['Party_Size', 'Previous_Visits', 'Wait_Time_Min', 'Table_Turn_Time_Min', 'Bill_Amount', 'Is_Weekend']
target = 'Guest_ID'  # Assuming 'Guest_ID' represents unique customers per day

# Ensure Guest_ID is numerical (count of guests per day)
merged_df[target] = merged_df.groupby('Date')[target].transform('count')

# Drop duplicates to keep daily totals
merged_df = merged_df.drop_duplicates(subset=['Date'])

# Define X and y
X = merged_df[features]
y = merged_df[target]

# Train-test split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5

# Forecast next 7 days
future_dates = pd.date_range(start=merged_df['Date'].max() + pd.Timedelta(days=1), periods=7)
future_features = pd.DataFrame({
    'Party_Size': [X['Party_Size'].mean()] * 7,
    'Previous_Visits': [X['Previous_Visits'].mean()] * 7,
    'Wait_Time_Min': [X['Wait_Time_Min'].mean()] * 7,
    'Table_Turn_Time_Min': [X['Table_Turn_Time_Min'].mean()] * 7,
    'Bill_Amount': [X['Bill_Amount'].mean()] * 7,
    'Is_Weekend': [1 if d.weekday() in [5, 6] else 0 for d in future_dates]
})

# Predict future customer demand
future_predictions = model.predict(future_features)

# Output results
future_forecast = pd.DataFrame({'Date': future_dates, 'Predicted_Customers': future_predictions})
future_forecast


NameError: name 'pd' is not defined

In [11]:

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Load data
reservations = pd.read_csv("Calgary_Reservations_Data.csv")
sales = pd.read_csv("Calgary_Restaurant_Sales.csv")


In [13]:

# Merge on Date
merged_df = pd.merge(reservations, sales, on='Date', how='inner')

# Add weekend flag
merged_df['Is_Weekend'] = pd.to_datetime(merged_df['Date']).dt.weekday.isin([5, 6]).astype(int)

# Target: count of unique guests per day
merged_df['Guest_ID'] = merged_df.groupby('Date')['Guest_ID'].transform('count')

# Drop duplicate rows to represent daily data
merged_df = merged_df.drop_duplicates(subset=['Date'])

# Features and target
features = ['Party_Size', 'Previous_Visits', 'Wait_Time_Min', 'Table_Turn_Time_Min', 'Bill_Amount', 'Is_Weekend']
target = 'Guest_ID'

X = merged_df[features]
y = merged_df[target]


In [15]:

# Standard split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("📊 Standard Train/Test Split Performance")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R²: {r2:.2f}")


📊 Standard Train/Test Split Performance
MAE: 6.24
RMSE: 7.10
R²: -0.03


In [17]:

# Time Series Split
tscv = TimeSeriesSplit(n_splits=5)
model_ts = LinearRegression()

print("\n📈 Time Series Split Performance")
for fold, (train_idx, test_idx) in enumerate(tscv.split(X), 1):
    X_train_ts, X_test_ts = X.iloc[train_idx], X.iloc[test_idx]
    y_train_ts, y_test_ts = y.iloc[train_idx], y.iloc[test_idx]

    model_ts.fit(X_train_ts, y_train_ts)
    y_pred_ts = model_ts.predict(X_test_ts)

    mae_ts = mean_absolute_error(y_test_ts, y_pred_ts)
    rmse_ts = np.sqrt(mean_squared_error(y_test_ts, y_pred_ts))
    r2_ts = r2_score(y_test_ts, y_pred_ts)

    print(f"Fold {fold}: MAE={mae_ts:.2f}, RMSE={rmse_ts:.2f}, R²={r2_ts:.2f}")



📈 Time Series Split Performance
Fold 1: MAE=6.45, RMSE=7.53, R²=-0.06
Fold 2: MAE=5.88, RMSE=6.91, R²=-0.08
Fold 3: MAE=6.07, RMSE=7.13, R²=0.00
Fold 4: MAE=6.36, RMSE=7.42, R²=-0.10
Fold 5: MAE=6.12, RMSE=7.08, R²=-0.00


In [19]:

from datetime import timedelta

# Forecast next 7 days using average values and adjusting Is_Weekend flag
last_known_date = pd.to_datetime(merged_df['Date'].max())
future_dates = pd.date_range(start=last_known_date + timedelta(days=1), periods=7)

# Use average values from training features
default_values = {col: X[col].mean() for col in X.columns}

forecast_rows = []
for d in future_dates:
    row = default_values.copy()
    if 'Is_Weekend' in row:
        row['Is_Weekend'] = 1 if d.weekday() in [5, 6] else 0
    row_df = pd.DataFrame([row])
    prediction = model.predict(row_df)[0]
    forecast_rows.append({'Date': d, 'Predicted_Customers': prediction})

future_forecast_df = pd.DataFrame(forecast_rows)
print("📅 7-Day Customer Forecast:")
print(future_forecast_df)


📅 7-Day Customer Forecast:
        Date  Predicted_Customers
0 2024-01-01            17.072677
1 2024-01-02            17.072677
2 2024-01-03            17.072677
3 2024-01-04            17.072677
4 2024-01-05            17.072677
5 2024-01-06            16.303038
6 2024-01-07            16.303038
