In [1]:
# ## 1. Import Libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# For datetime processing
from datetime import datetime

# Display settings
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

In [2]:
# Load dataset (example CSV)
df = pd.read_csv("restaurant_sales_data.csv")

# Show basic info
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 730 entries, 0 to 729
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Date             730 non-null    object 
 1   Restaurant       730 non-null    object 
 2   Customers        730 non-null    int64  
 3   Average_Spend    730 non-null    float64
 4   Revenue          730 non-null    float64
 5   Cost             730 non-null    float64
 6   Profit           730 non-null    float64
 7   Profit_Margin    730 non-null    float64
 8   Location         730 non-null    object 
 9   Cuisine          730 non-null    object 
 10  Population_Area  730 non-null    int64  
dtypes: float64(5), int64(2), object(4)
memory usage: 62.9+ KB


Unnamed: 0,Date,Restaurant,Customers,Average_Spend,Revenue,Cost,Profit,Profit_Margin,Location,Cuisine,Population_Area
0,2025-01-01,CASA DE MOOR - CHORLTON,152,30.32,4608.71,2327.54,2281.17,49.496931,"Chorlton, Manchester",Mediterranean,14138
1,2025-01-01,Railway Grill,121,25.77,3118.07,1549.07,1568.99,50.319268,Manchester,Grill,15000
2,2025-01-02,CASA DE MOOR - CHORLTON,124,22.56,2797.78,1539.09,1258.69,44.988884,"Chorlton, Manchester",Mediterranean,14138
3,2025-01-02,Railway Grill,153,28.29,4327.71,1974.19,2353.51,54.382341,Manchester,Grill,15000
4,2025-01-03,CASA DE MOOR - CHORLTON,51,28.61,1458.9,1067.28,391.62,26.843512,"Chorlton, Manchester",Mediterranean,14138


In [4]:
df = df.reset_index()


In [6]:
# 1. Print columns to see the exact names (useful for debugging)
print("Columns in df:", df.columns.tolist())

# 2. Ensure the columns exist before calculating
# Tip: Check if there are spaces like ' food_cost' or if it's 'Food Cost'
try:
    # Cost % = food cost / selling price
    df['food_cost_pct'] = df['food_cost'] / df['price']

    # Average food cost percentage
    cost_pct = df['food_cost_pct'].mean() * 100
    print(f"Avg Food Cost %: {round(cost_pct, 2)}%")

except KeyError as e:
    print(f"Error: The column {e} was not found. Check for typos or capitalisation!")


Columns in df: ['index', 'Date', 'Restaurant', 'Customers', 'Average_Spend', 'Revenue', 'Cost', 'Profit', 'Profit_Margin', 'Location', 'Cuisine', 'Population_Area']
Error: The column 'food_cost' was not found. Check for typos or capitalisation!


In [9]:
import pandas as pd

# 1. Variables
total_revenue = 5000 
seats = 80
open_hours = 10

# RevPASH Calculation
revPASH = total_revenue / (seats * open_hours)
print(f"RevPASH: £{revPASH:.2f}")

# 2. Table turnover (Fixing the UserWarning)
data = {
    'start_time': ['12:00', '12:30', '13:00'],
    'end_time': ['13:15', '14:00', '14:15']
}
df = pd.DataFrame(data)

# Specify format='%H:%M' to stop the warning
df['start_time'] = pd.to_datetime(df['start_time'], format='%H:%M')
df['end_time'] = pd.to_datetime(df['end_time'], format='%H:%M')

# Calculate duration in minutes
df['duration'] = (df['end_time'] - df['start_time']).dt.total_seconds() / 60
avg_turnover = df['duration'].mean()

print(f"Average Turnover: {avg_turnover:.1f} minutes")


RevPASH: £6.25
Average Turnover: 80.0 minutes


In [11]:
staff = pd.read_csv("staff_data.csv")
staff.head()

# Productivity = Revenue / labor hours
staff['revenue_per_hour'] = total_revenue / staff['hours_worked']

# Employee turnover rate
turnover = staff[staff['status'] == 'left'].count()['employee_id']/staff.count()['employee_id']
print("Turnover Rate:", round(turnover*100,2), "%")

Turnover Rate: 40.0 %


In [15]:
print(df.columns)


Index(['start_time', 'end_time', 'duration'], dtype='object')
